| 
     1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
    5  *
    6  * This software is available to you under a choice of one of two
    7  * licenses.  You may choose to be licensed under the terms of the GNU
    8  * General Public License (GPL) Version 2, available from the file
    9  * COPYING in the main directory of this source tree, or the
   10  * OpenIB.org BSD license below:
   11  *
   12  *     Redistribution and use in source and binary forms, with or
   13  *     without modification, are permitted provided that the following
   14  *     conditions are met:
   15  *
   16  *      - Redistributions of source code must retain the above
   17  *        copyright notice, this list of conditions and the following
   18  *        disclaimer.
   19  *
   20  *      - Redistributions in binary form must reproduce the above
   21  *        copyright notice, this list of conditions and the following
   22  *        disclaimer in the documentation and/or other materials
   23  *        provided with the distribution.
   24  *
   25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   32  * SOFTWARE.
   33  */
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_inet.h"
   38 
   39 #ifdef TCP_OFFLOAD
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/kernel.h>
   43 #include <sys/ktr.h>
   44 #include <sys/bus.h>
   45 #include <sys/lock.h>
   46 #include <sys/mutex.h>
   47 #include <sys/rwlock.h>
   48 #include <sys/socket.h>
   49 #include <sys/sbuf.h>
   50 
   51 #include "iw_cxgbe.h"
   52 #include "user.h"
   53 
   54 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
   55                       struct c4iw_dev_ucontext *uctx)
   56 {
   57         struct adapter *sc = rdev->adap;
   58         struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
   59         struct fw_ri_res_wr *res_wr;
   60         struct fw_ri_res *res;
   61         int wr_len;
   62         struct c4iw_wr_wait wr_wait;
   63         struct wrqe *wr;
   64 
   65         wr_len = sizeof *res_wr + sizeof *res;
   66         wr = alloc_wrqe(wr_len, &sc->sge.ctrlq[0]);
   67                 if (wr == NULL)
   68                         return (0);
   69         res_wr = wrtod(wr);
   70         memset(res_wr, 0, wr_len);
   71         res_wr->op_nres = cpu_to_be32(
   72                         V_FW_WR_OP(FW_RI_RES_WR) |
   73                         V_FW_RI_RES_WR_NRES(1) |
   74                         F_FW_WR_COMPL);
   75         res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
   76         res_wr->cookie = (unsigned long) &wr_wait;
   77         res = res_wr->res;
   78         res->u.cq.restype = FW_RI_RES_TYPE_CQ;
   79         res->u.cq.op = FW_RI_RES_OP_RESET;
   80         res->u.cq.iqid = cpu_to_be32(cq->cqid);
   81 
   82         c4iw_init_wr_wait(&wr_wait);
   83 
   84         t4_wrq_tx(sc, wr);
   85 
   86         c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
   87 
   88         kfree(cq->sw_queue);
   89         dma_free_coherent(rhp->ibdev.dma_device,
   90                           cq->memsize, cq->queue,
   91                           dma_unmap_addr(cq, mapping));
   92         c4iw_put_cqid(rdev, cq->cqid, uctx);
   93         return 0;
   94 }
   95 
   96 static int
   97 create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
   98     struct c4iw_dev_ucontext *uctx)
   99 {
  100         struct adapter *sc = rdev->adap;
  101         struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
  102         struct fw_ri_res_wr *res_wr;
  103         struct fw_ri_res *res;
  104         int wr_len;
  105         int user = (uctx != &rdev->uctx);
  106         struct c4iw_wr_wait wr_wait;
  107         int ret;
  108         struct wrqe *wr;
  109         u64 cq_bar2_qoffset = 0;
  110 
  111         cq->cqid = c4iw_get_cqid(rdev, uctx);
  112         if (!cq->cqid) {
  113                 ret = -ENOMEM;
  114                 goto err1;
  115         }
  116 
  117         if (!user) {
  118                 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
  119                 if (!cq->sw_queue) {
  120                         ret = -ENOMEM;
  121                         goto err2;
  122                 }
  123         }
  124         cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize,
  125                                        &cq->dma_addr, GFP_KERNEL);
  126         if (!cq->queue) {
  127                 ret = -ENOMEM;
  128                 goto err3;
  129         }
  130         dma_unmap_addr_set(cq, mapping, cq->dma_addr);
  131         memset(cq->queue, 0, cq->memsize);
  132 
  133         /* build fw_ri_res_wr */
  134         wr_len = sizeof *res_wr + sizeof *res;
  135 
  136         wr = alloc_wrqe(wr_len, &sc->sge.ctrlq[0]);
  137         if (wr == NULL)
  138                 return (0);
  139         res_wr = wrtod(wr);
  140 
  141         memset(res_wr, 0, wr_len);
  142         res_wr->op_nres = cpu_to_be32(
  143                         V_FW_WR_OP(FW_RI_RES_WR) |
  144                         V_FW_RI_RES_WR_NRES(1) |
  145                         F_FW_WR_COMPL);
  146         res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
  147         res_wr->cookie = (unsigned long) &wr_wait;
  148         res = res_wr->res;
  149         res->u.cq.restype = FW_RI_RES_TYPE_CQ;
  150         res->u.cq.op = FW_RI_RES_OP_WRITE;
  151         res->u.cq.iqid = cpu_to_be32(cq->cqid);
  152         //Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
  153         res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
  154                         V_FW_RI_RES_WR_IQANUS(0) |
  155                         V_FW_RI_RES_WR_IQANUD(1) |
  156                         F_FW_RI_RES_WR_IQANDST |
  157                         V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
  158         res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
  159                         F_FW_RI_RES_WR_IQDROPRSS |
  160                         V_FW_RI_RES_WR_IQPCIECH(2) |
  161                         V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
  162                         F_FW_RI_RES_WR_IQO |
  163                         V_FW_RI_RES_WR_IQESIZE(1));
  164         res->u.cq.iqsize = cpu_to_be16(cq->size);
  165         res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
  166 
  167         c4iw_init_wr_wait(&wr_wait);
  168 
  169         t4_wrq_tx(sc, wr);
  170 
  171         CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
  172         ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
  173         if (ret)
  174                 goto err4;
  175 
  176         cq->gen = 1;
  177         cq->rdev = rdev;
  178 
  179         /* Determine the BAR2 queue offset and qid. */
  180         t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user,
  181                         &cq_bar2_qoffset, &cq->bar2_qid);
  182 
  183         /* If user mapping then compute the page-aligned physical
  184          * address for mapping.
  185          */
  186         if (user)
  187                 cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK;
  188         else
  189                 cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva +
  190                         cq_bar2_qoffset);
  191 
  192         return 0;
  193 err4:
  194         dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue,
  195                           dma_unmap_addr(cq, mapping));
  196 err3:
  197         kfree(cq->sw_queue);
  198 err2:
  199         c4iw_put_cqid(rdev, cq->cqid, uctx);
  200 err1:
  201         return ret;
  202 }
  203 
  204 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
  205 {
  206         struct t4_cqe cqe;
  207 
  208         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
  209             cq, cq->sw_cidx, cq->sw_pidx);
  210         memset(&cqe, 0, sizeof(cqe));
  211         cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
  212                                  V_CQE_OPCODE(FW_RI_SEND) |
  213                                  V_CQE_TYPE(0) |
  214                                  V_CQE_SWCQE(1) |
  215                                  V_CQE_QPID(wq->sq.qid));
  216         cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
  217         cq->sw_queue[cq->sw_pidx] = cqe;
  218         t4_swcq_produce(cq);
  219 }
  220 
  221 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
  222 {
  223         int flushed = 0;
  224         int in_use = wq->rq.in_use - count;
  225 
  226         BUG_ON(in_use < 0);
  227         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
  228             __func__, wq, cq, wq->rq.in_use, count);
  229         while (in_use--) {
  230                 insert_recv_cqe(wq, cq);
  231                 flushed++;
  232         }
  233         return flushed;
  234 }
  235 
  236 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
  237                           struct t4_swsqe *swcqe)
  238 {
  239         struct t4_cqe cqe;
  240 
  241         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
  242             cq, cq->sw_cidx, cq->sw_pidx);
  243         memset(&cqe, 0, sizeof(cqe));
  244         cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
  245                                  V_CQE_OPCODE(swcqe->opcode) |
  246                                  V_CQE_TYPE(1) |
  247                                  V_CQE_SWCQE(1) |
  248                                  V_CQE_QPID(wq->sq.qid));
  249         CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
  250         cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
  251         cq->sw_queue[cq->sw_pidx] = cqe;
  252         t4_swcq_produce(cq);
  253 }
  254 
  255 static void advance_oldest_read(struct t4_wq *wq);
  256 
  257 int c4iw_flush_sq(struct c4iw_qp *qhp)
  258 {
  259         int flushed = 0;
  260         struct t4_wq *wq = &qhp->wq;
  261         struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
  262         struct t4_cq *cq = &chp->cq;
  263         int idx;
  264         struct t4_swsqe *swsqe;
  265 
  266         if (wq->sq.flush_cidx == -1)
  267                 wq->sq.flush_cidx = wq->sq.cidx;
  268         idx = wq->sq.flush_cidx;
  269         BUG_ON(idx >= wq->sq.size);
  270         while (idx != wq->sq.pidx) {
  271                 swsqe = &wq->sq.sw_sq[idx];
  272                 BUG_ON(swsqe->flushed);
  273                 swsqe->flushed = 1;
  274                 insert_sq_cqe(wq, cq, swsqe);
  275                 if (wq->sq.oldest_read == swsqe) {
  276                         BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
  277                         advance_oldest_read(wq);
  278                 }
  279                 flushed++;
  280                 if (++idx == wq->sq.size)
  281                         idx = 0;
  282         }
  283         wq->sq.flush_cidx += flushed;
  284         if (wq->sq.flush_cidx >= wq->sq.size)
  285                 wq->sq.flush_cidx -= wq->sq.size;
  286         return flushed;
  287 }
  288 
  289 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
  290 {
  291         struct t4_swsqe *swsqe;
  292         int cidx;
  293 
  294         if (wq->sq.flush_cidx == -1)
  295                 wq->sq.flush_cidx = wq->sq.cidx;
  296         cidx = wq->sq.flush_cidx;
  297         BUG_ON(cidx > wq->sq.size);
  298 
  299         while (cidx != wq->sq.pidx) {
  300                 swsqe = &wq->sq.sw_sq[cidx];
  301                 if (!swsqe->signaled) {
  302                         if (++cidx == wq->sq.size)
  303                                 cidx = 0;
  304                 } else if (swsqe->complete) {
  305 
  306                         BUG_ON(swsqe->flushed);
  307 
  308                         /*
  309                          * Insert this completed cqe into the swcq.
  310                          */
  311                         CTR3(KTR_IW_CXGBE,
  312                                 "%s moving cqe into swcq sq idx %u cq idx %u\n",
  313                                 __func__, cidx, cq->sw_pidx);
  314                         swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
  315                         cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
  316                         t4_swcq_produce(cq);
  317                         swsqe->flushed = 1;
  318                         if (++cidx == wq->sq.size)
  319                                 cidx = 0;
  320                         wq->sq.flush_cidx = cidx;
  321                 } else
  322                         break;
  323         }
  324 }
  325 
  326 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
  327                 struct t4_cqe *read_cqe)
  328 {
  329         read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
  330         read_cqe->len = htonl(wq->sq.oldest_read->read_len);
  331         read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
  332                         V_CQE_SWCQE(SW_CQE(hw_cqe)) |
  333                         V_CQE_OPCODE(FW_RI_READ_REQ) |
  334                         V_CQE_TYPE(1));
  335         read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
  336 }
  337 
  338 static void advance_oldest_read(struct t4_wq *wq)
  339 {
  340 
  341         u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
  342 
  343         if (rptr == wq->sq.size)
  344                 rptr = 0;
  345         while (rptr != wq->sq.pidx) {
  346                 wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
  347 
  348                 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
  349                         return;
  350                 if (++rptr == wq->sq.size)
  351                         rptr = 0;
  352         }
  353         wq->sq.oldest_read = NULL;
  354 }
  355 
  356 /*
  357  * Move all CQEs from the HWCQ into the SWCQ.
  358  * Deal with out-of-order and/or completions that complete
  359  * prior unsignalled WRs.
  360  */
  361 void c4iw_flush_hw_cq(struct c4iw_cq *chp)
  362 {
  363         struct t4_cqe *hw_cqe, *swcqe, read_cqe;
  364         struct c4iw_qp *qhp;
  365         struct t4_swsqe *swsqe;
  366         int ret;
  367 
  368         CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq,
  369                         chp->cq.cqid);
  370         ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
  371 
  372         /*
  373          * This logic is similar to poll_cq(), but not quite the same
  374          * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
  375          * also do any translation magic that poll_cq() normally does.
  376          */
  377         while (!ret) {
  378                 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
  379 
  380                 /*
  381                  * drop CQEs with no associated QP
  382                  */
  383                 if (qhp == NULL)
  384                         goto next_cqe;
  385 
  386                 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
  387                         goto next_cqe;
  388 
  389                 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
  390 
  391                         /* If we have reached here because of async
  392                          * event or other error, and have egress error
  393                          * then drop
  394                          */
  395                         if (CQE_TYPE(hw_cqe) == 1)
  396                                 goto next_cqe;
  397 
  398                         /* drop peer2peer RTR reads.
  399                          */
  400                         if (CQE_WRID_STAG(hw_cqe) == 1)
  401                                 goto next_cqe;
  402 
  403                         /*
  404                          * Eat completions for unsignaled read WRs.
  405                          */
  406                         if (!qhp->wq.sq.oldest_read->signaled) {
  407                                 advance_oldest_read(&qhp->wq);
  408                                 goto next_cqe;
  409                         }
  410 
  411                         /*
  412                          * Don't write to the HWCQ, create a new read req CQE
  413                          * in local memory and move it into the swcq.
  414                          */
  415                         create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
  416                         hw_cqe = &read_cqe;
  417                         advance_oldest_read(&qhp->wq);
  418                 }
  419 
  420                 /* if its a SQ completion, then do the magic to move all the
  421                  * unsignaled and now in-order completions into the swcq.
  422                  */
  423                 if (SQ_TYPE(hw_cqe)) {
  424                         swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
  425                         swsqe->cqe = *hw_cqe;
  426                         swsqe->complete = 1;
  427                         flush_completed_wrs(&qhp->wq, &chp->cq);
  428                 } else {
  429                         swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
  430                         *swcqe = *hw_cqe;
  431                         swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
  432                         t4_swcq_produce(&chp->cq);
  433                 }
  434 next_cqe:
  435                 t4_hwcq_consume(&chp->cq);
  436                 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
  437         }
  438 }
  439 
  440 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
  441 {
  442         if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
  443                 return 0;
  444 
  445         if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
  446                 return 0;
  447 
  448         if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
  449                 return 0;
  450 
  451         if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
  452                 return 0;
  453         return 1;
  454 }
  455 
  456 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
  457 {
  458         struct t4_cqe *cqe;
  459         u32 ptr;
  460 
  461         *count = 0;
  462         CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
  463         ptr = cq->sw_cidx;
  464         while (ptr != cq->sw_pidx) {
  465                 cqe = &cq->sw_queue[ptr];
  466                 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
  467                     (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
  468                         (*count)++;
  469                 if (++ptr == cq->size)
  470                         ptr = 0;
  471         }
  472         CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
  473 }
  474 
  475 /*
  476  * poll_cq
  477  *
  478  * Caller must:
  479  *     check the validity of the first CQE,
  480  *     supply the wq assicated with the qpid.
  481  *
  482  * credit: cq credit to return to sge.
  483  * cqe_flushed: 1 iff the CQE is flushed.
  484  * cqe: copy of the polled CQE.
  485  *
  486  * return value:
  487  *    0             CQE returned ok.
  488  *    -EAGAIN       CQE skipped, try again.
  489  *    -EOVERFLOW    CQ overflow detected.
  490  */
  491 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
  492                    u8 *cqe_flushed, u64 *cookie, u32 *credit)
  493 {
  494         int ret = 0;
  495         struct t4_cqe *hw_cqe, read_cqe;
  496 
  497         *cqe_flushed = 0;
  498         *credit = 0;
  499         ret = t4_next_cqe(cq, &hw_cqe);
  500         if (ret)
  501                 return ret;
  502 
  503         CTR6(KTR_IW_CXGBE,
  504             "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
  505             CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
  506             CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
  507         CTR5(KTR_IW_CXGBE,
  508             "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
  509             __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
  510             CQE_WRID_LOW(hw_cqe));
  511 
  512         /*
  513          * skip cqe's not affiliated with a QP.
  514          */
  515         if (wq == NULL) {
  516                 ret = -EAGAIN;
  517                 goto skip_cqe;
  518         }
  519 
  520         /*
  521         * skip hw cqe's if the wq is flushed.
  522         */
  523         if (wq->flushed && !SW_CQE(hw_cqe)) {
  524                 ret = -EAGAIN;
  525                 goto skip_cqe;
  526         }
  527 
  528         /*
  529          * skip TERMINATE cqes...
  530          */
  531         if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
  532                 ret = -EAGAIN;
  533                 goto skip_cqe;
  534         }
  535 
  536         /*
  537          * Special cqe for drain WR completions...
  538          */
  539         if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
  540                 *cookie = CQE_DRAIN_COOKIE(hw_cqe);
  541                 *cqe = *hw_cqe;
  542                 goto skip_cqe;
  543         }
  544 
  545         /*
  546          * Gotta tweak READ completions:
  547          *      1) the cqe doesn't contain the sq_wptr from the wr.
  548          *      2) opcode not reflected from the wr.
  549          *      3) read_len not reflected from the wr.
  550          *      4) cq_type is RQ_TYPE not SQ_TYPE.
  551          */
  552         if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
  553 
  554                 /* If we have reached here because of async
  555                  * event or other error, and have egress error
  556                  * then drop
  557                  */
  558                 if (CQE_TYPE(hw_cqe) == 1) {
  559                         if (CQE_STATUS(hw_cqe))
  560                                 t4_set_wq_in_error(wq);
  561                         ret = -EAGAIN;
  562                         goto skip_cqe;
  563                 }
  564 
  565                 /* If this is an unsolicited read response, then the read
  566                  * was generated by the kernel driver as part of peer-2-peer
  567                  * connection setup.  So ignore the completion.
  568                  */
  569                 if (CQE_WRID_STAG(hw_cqe) == 1) {
  570                         if (CQE_STATUS(hw_cqe))
  571                                 t4_set_wq_in_error(wq);
  572                         ret = -EAGAIN;
  573                         goto skip_cqe;
  574                 }
  575 
  576                 /*
  577                  * Eat completions for unsignaled read WRs.
  578                  */
  579                 if (!wq->sq.oldest_read->signaled) {
  580                         advance_oldest_read(wq);
  581                         ret = -EAGAIN;
  582                         goto skip_cqe;
  583                 }
  584 
  585                 /*
  586                  * Don't write to the HWCQ, so create a new read req CQE
  587                  * in local memory.
  588                  */
  589                 create_read_req_cqe(wq, hw_cqe, &read_cqe);
  590                 hw_cqe = &read_cqe;
  591                 advance_oldest_read(wq);
  592         }
  593 
  594         if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
  595                 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
  596                 t4_set_wq_in_error(wq);
  597         }
  598 
  599         /*
  600          * RECV completion.
  601          */
  602         if (RQ_TYPE(hw_cqe)) {
  603 
  604                 /*
  605                  * HW only validates 4 bits of MSN.  So we must validate that
  606                  * the MSN in the SEND is the next expected MSN.  If its not,
  607                  * then we complete this with T4_ERR_MSN and mark the wq in
  608                  * error.
  609                  */
  610 
  611                 if (t4_rq_empty(wq)) {
  612                         t4_set_wq_in_error(wq);
  613                         ret = -EAGAIN;
  614                         goto skip_cqe;
  615                 }
  616                 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
  617                         t4_set_wq_in_error(wq);
  618                         hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
  619                         goto proc_cqe;
  620                 }
  621                 goto proc_cqe;
  622         }
  623 
  624         /*
  625          * If we get here its a send completion.
  626          *
  627          * Handle out of order completion. These get stuffed
  628          * in the SW SQ. Then the SW SQ is walked to move any
  629          * now in-order completions into the SW CQ.  This handles
  630          * 2 cases:
  631          *      1) reaping unsignaled WRs when the first subsequent
  632          *         signaled WR is completed.
  633          *      2) out of order read completions.
  634          */
  635         if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
  636                 struct t4_swsqe *swsqe;
  637 
  638                 CTR2(KTR_IW_CXGBE,
  639                     "%s out of order completion going in sw_sq at idx %u",
  640                     __func__, CQE_WRID_SQ_IDX(hw_cqe));
  641                 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
  642                 swsqe->cqe = *hw_cqe;
  643                 swsqe->complete = 1;
  644                 ret = -EAGAIN;
  645                 goto flush_wq;
  646         }
  647 
  648 proc_cqe:
  649         *cqe = *hw_cqe;
  650 
  651         /*
  652          * Reap the associated WR(s) that are freed up with this
  653          * completion.
  654          */
  655         if (SQ_TYPE(hw_cqe)) {
  656                 int idx = CQE_WRID_SQ_IDX(hw_cqe);
  657                 BUG_ON(idx >= wq->sq.size);
  658 
  659                 /*
  660                 * Account for any unsignaled completions completed by
  661                 * this signaled completion.  In this case, cidx points
  662                 * to the first unsignaled one, and idx points to the
  663                 * signaled one.  So adjust in_use based on this delta.
  664                 * if this is not completing any unsigned wrs, then the
  665                 * delta will be 0. Handle wrapping also!
  666                 */
  667                 if (idx < wq->sq.cidx)
  668                         wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
  669                 else
  670                         wq->sq.in_use -= idx - wq->sq.cidx;
  671                 BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
  672 
  673                 wq->sq.cidx = (uint16_t)idx;
  674                 CTR2(KTR_IW_CXGBE, "%s completing sq idx %u",
  675                                 __func__, wq->sq.cidx);
  676                 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
  677                 t4_sq_consume(wq);
  678         } else {
  679                 CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
  680                      __func__, wq->rq.cidx);
  681                 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
  682                 BUG_ON(t4_rq_empty(wq));
  683                 t4_rq_consume(wq);
  684                 goto skip_cqe;
  685         }
  686 
  687 flush_wq:
  688         /*
  689          * Flush any completed cqes that are now in-order.
  690          */
  691         flush_completed_wrs(wq, cq);
  692 
  693 skip_cqe:
  694         if (SW_CQE(hw_cqe)) {
  695                 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
  696                      __func__, cq, cq->cqid, cq->sw_cidx);
  697                 t4_swcq_consume(cq);
  698         } else {
  699                 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
  700                      __func__, cq, cq->cqid, cq->cidx);
  701                 t4_hwcq_consume(cq);
  702         }
  703         return ret;
  704 }
  705 
  706 /*
  707  * Get one cq entry from c4iw and map it to openib.
  708  *
  709  * Returns:
  710  *      0                       cqe returned
  711  *      -ENODATA                EMPTY;
  712  *      -EAGAIN                 caller must try again
  713  *      any other -errno        fatal error
  714  */
  715 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
  716 {
  717         struct c4iw_qp *qhp = NULL;
  718         struct t4_cqe cqe = {0, 0}, *rd_cqe;
  719         struct t4_wq *wq;
  720         u32 credit = 0;
  721         u8 cqe_flushed;
  722         u64 cookie = 0;
  723         int ret;
  724 
  725         ret = t4_next_cqe(&chp->cq, &rd_cqe);
  726 
  727         if (ret)
  728                 return ret;
  729 
  730         qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
  731         if (!qhp)
  732                 wq = NULL;
  733         else {
  734                 spin_lock(&qhp->lock);
  735                 wq = &(qhp->wq);
  736         }
  737         ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
  738         if (ret)
  739                 goto out;
  740 
  741         wc->wr_id = cookie;
  742         wc->qp = &qhp->ibqp;
  743         wc->vendor_err = CQE_STATUS(&cqe);
  744         wc->wc_flags = 0;
  745 
  746         CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
  747             __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
  748             CQE_STATUS(&cqe));
  749         CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
  750             __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
  751             (unsigned long long)cookie);
  752 
  753         if (CQE_TYPE(&cqe) == 0) {
  754                 if (!CQE_STATUS(&cqe))
  755                         wc->byte_len = CQE_LEN(&cqe);
  756                 else
  757                         wc->byte_len = 0;
  758                 wc->opcode = IB_WC_RECV;
  759                 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
  760                     CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
  761                         wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
  762                         wc->wc_flags |= IB_WC_WITH_INVALIDATE;
  763                         c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
  764                 }
  765         } else {
  766                 switch (CQE_OPCODE(&cqe)) {
  767                 case FW_RI_RDMA_WRITE:
  768                         wc->opcode = IB_WC_RDMA_WRITE;
  769                         break;
  770                 case FW_RI_READ_REQ:
  771                         wc->opcode = IB_WC_RDMA_READ;
  772                         wc->byte_len = CQE_LEN(&cqe);
  773                         break;
  774                 case FW_RI_SEND_WITH_INV:
  775                 case FW_RI_SEND_WITH_SE_INV:
  776                         wc->opcode = IB_WC_SEND;
  777                         wc->wc_flags |= IB_WC_WITH_INVALIDATE;
  778                         break;
  779                 case FW_RI_SEND:
  780                 case FW_RI_SEND_WITH_SE:
  781                         wc->opcode = IB_WC_SEND;
  782                         break;
  783                 case FW_RI_LOCAL_INV:
  784                         wc->opcode = IB_WC_LOCAL_INV;
  785                         break;
  786                 case FW_RI_FAST_REGISTER:
  787                         wc->opcode = IB_WC_REG_MR;
  788 
  789                         /* Invalidate the MR if the fastreg failed */
  790                         if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
  791                                 c4iw_invalidate_mr(qhp->rhp,
  792                                                    CQE_WRID_FR_STAG(&cqe));
  793                         break;
  794                 case C4IW_DRAIN_OPCODE:
  795                         wc->opcode = IB_WC_SEND;
  796                         break;
  797                 default:
  798                         printf("Unexpected opcode %d "
  799                                "in the CQE received for QPID = 0x%0x\n",
  800                                CQE_OPCODE(&cqe), CQE_QPID(&cqe));
  801                         ret = -EINVAL;
  802                         goto out;
  803                 }
  804         }
  805 
  806         if (cqe_flushed)
  807                 wc->status = IB_WC_WR_FLUSH_ERR;
  808         else {
  809 
  810                 switch (CQE_STATUS(&cqe)) {
  811                 case T4_ERR_SUCCESS:
  812                         wc->status = IB_WC_SUCCESS;
  813                         break;
  814                 case T4_ERR_STAG:
  815                         wc->status = IB_WC_LOC_ACCESS_ERR;
  816                         break;
  817                 case T4_ERR_PDID:
  818                         wc->status = IB_WC_LOC_PROT_ERR;
  819                         break;
  820                 case T4_ERR_QPID:
  821                 case T4_ERR_ACCESS:
  822                         wc->status = IB_WC_LOC_ACCESS_ERR;
  823                         break;
  824                 case T4_ERR_WRAP:
  825                         wc->status = IB_WC_GENERAL_ERR;
  826                         break;
  827                 case T4_ERR_BOUND:
  828                         wc->status = IB_WC_LOC_LEN_ERR;
  829                         break;
  830                 case T4_ERR_INVALIDATE_SHARED_MR:
  831                 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
  832                         wc->status = IB_WC_MW_BIND_ERR;
  833                         break;
  834                 case T4_ERR_CRC:
  835                 case T4_ERR_MARKER:
  836                 case T4_ERR_PDU_LEN_ERR:
  837                 case T4_ERR_OUT_OF_RQE:
  838                 case T4_ERR_DDP_VERSION:
  839                 case T4_ERR_RDMA_VERSION:
  840                 case T4_ERR_DDP_QUEUE_NUM:
  841                 case T4_ERR_MSN:
  842                 case T4_ERR_TBIT:
  843                 case T4_ERR_MO:
  844                 case T4_ERR_MSN_RANGE:
  845                 case T4_ERR_IRD_OVERFLOW:
  846                 case T4_ERR_OPCODE:
  847                 case T4_ERR_INTERNAL_ERR:
  848                         wc->status = IB_WC_FATAL_ERR;
  849                         break;
  850                 case T4_ERR_SWFLUSH:
  851                         wc->status = IB_WC_WR_FLUSH_ERR;
  852                         break;
  853                 default:
  854                         printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
  855                                CQE_STATUS(&cqe), CQE_QPID(&cqe));
  856                         wc->status = IB_WC_FATAL_ERR;
  857                 }
  858         }
  859 out:
  860         if (wq)
  861                 spin_unlock(&qhp->lock);
  862         return ret;
  863 }
  864 
  865 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
  866 {
  867         struct c4iw_cq *chp;
  868         unsigned long flags;
  869         int npolled;
  870         int err = 0;
  871 
  872         chp = to_c4iw_cq(ibcq);
  873 
  874         spin_lock_irqsave(&chp->lock, flags);
  875         for (npolled = 0; npolled < num_entries; ++npolled) {
  876                 do {
  877                         err = c4iw_poll_cq_one(chp, wc + npolled);
  878                 } while (err == -EAGAIN);
  879                 if (err)
  880                         break;
  881         }
  882         spin_unlock_irqrestore(&chp->lock, flags);
  883         return !err || err == -ENODATA ? npolled : err;
  884 }
  885 
  886 void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
  887 {
  888         struct c4iw_cq *chp;
  889         struct c4iw_ucontext *ucontext;
  890 
  891         CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
  892         chp = to_c4iw_cq(ib_cq);
  893 
  894         remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
  895         atomic_dec(&chp->refcnt);
  896         wait_event(chp->wait, !atomic_read(&chp->refcnt));
  897 
  898         ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
  899             ibucontext);
  900         destroy_cq(&chp->rhp->rdev, &chp->cq,
  901                    ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
  902 }
  903 
  904 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
  905                    struct ib_udata *udata)
  906 {
  907         struct ib_device *ibdev = ibcq->device;
  908         int entries = attr->cqe;
  909         int vector = attr->comp_vector;
  910         struct c4iw_dev *rhp;
  911         struct c4iw_cq *chp = to_c4iw_cq(ibcq);
  912         struct c4iw_create_cq_resp uresp;
  913         struct c4iw_ucontext *ucontext = NULL;
  914         int ret;
  915         size_t memsize, hwentries;
  916         struct c4iw_mm_entry *mm, *mm2;
  917 
  918         CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
  919         if (attr->flags)
  920                 return -EINVAL;
  921 
  922         rhp = to_c4iw_dev(ibdev);
  923 
  924         ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
  925             ibucontext);
  926 
  927         /* account for the status page. */
  928         entries++;
  929 
  930         /* IQ needs one extra entry to differentiate full vs empty. */
  931         entries++;
  932 
  933         /*
  934          * entries must be multiple of 16 for HW.
  935          */
  936         entries = roundup(entries, 16);
  937 
  938         /*
  939          * Make actual HW queue 2x to avoid cdix_inc overflows.
  940          */
  941         hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size);
  942 
  943         /*
  944          * Make HW queue at least 64 entries so GTS updates aren't too
  945          * frequent.
  946          */
  947         if (hwentries < 64)
  948                 hwentries = 64;
  949 
  950         memsize = hwentries * sizeof *chp->cq.queue;
  951 
  952         /*
  953          * memsize must be a multiple of the page size if its a user cq.
  954          */
  955         if (ucontext)
  956                 memsize = roundup(memsize, PAGE_SIZE);
  957         chp->cq.size = hwentries;
  958         chp->cq.memsize = memsize;
  959         chp->cq.vector = vector;
  960 
  961         ret = create_cq(&rhp->rdev, &chp->cq,
  962                         ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
  963         if (ret)
  964                 goto err1;
  965 
  966         chp->rhp = rhp;
  967         chp->cq.size--;                         /* status page */
  968         chp->ibcq.cqe = entries - 2;
  969         spin_lock_init(&chp->lock);
  970         spin_lock_init(&chp->comp_handler_lock);
  971         atomic_set(&chp->refcnt, 1);
  972         init_waitqueue_head(&chp->wait);
  973         ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
  974         if (ret)
  975                 goto err2;
  976 
  977         if (ucontext) {
  978                 ret = -ENOMEM;
  979                 mm = kmalloc(sizeof *mm, GFP_KERNEL);
  980                 if (!mm)
  981                         goto err3;
  982                 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
  983                 if (!mm2)
  984                         goto err4;
  985 
  986                 memset(&uresp, 0, sizeof(uresp));
  987                 uresp.qid_mask = rhp->rdev.cqmask;
  988                 uresp.cqid = chp->cq.cqid;
  989                 uresp.size = chp->cq.size;
  990                 uresp.memsize = chp->cq.memsize;
  991                 spin_lock(&ucontext->mmap_lock);
  992                 uresp.key = ucontext->key;
  993                 ucontext->key += PAGE_SIZE;
  994                 uresp.gts_key = ucontext->key;
  995                 ucontext->key += PAGE_SIZE;
  996                 spin_unlock(&ucontext->mmap_lock);
  997                 ret = ib_copy_to_udata(udata, &uresp,
  998                                         sizeof(uresp) - sizeof(uresp.reserved));
  999                 if (ret)
 1000                         goto err5;
 1001 
 1002                 mm->key = uresp.key;
 1003                 mm->addr = vtophys(chp->cq.queue);
 1004                 mm->len = chp->cq.memsize;
 1005                 insert_mmap(ucontext, mm);
 1006 
 1007                 mm2->key = uresp.gts_key;
 1008                 mm2->addr = chp->cq.bar2_pa;
 1009                 mm2->len = PAGE_SIZE;
 1010                 insert_mmap(ucontext, mm2);
 1011         }
 1012         CTR6(KTR_IW_CXGBE,
 1013             "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
 1014             __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
 1015             (unsigned long long) chp->cq.dma_addr);
 1016         return 0;
 1017 err5:
 1018         kfree(mm2);
 1019 err4:
 1020         kfree(mm);
 1021 err3:
 1022         remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
 1023 err2:
 1024         destroy_cq(&chp->rhp->rdev, &chp->cq,
 1025                    ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
 1026 err1:
 1027         return ret;
 1028 }
 1029 
 1030 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
 1031 {
 1032         return -ENOSYS;
 1033 }
 1034 
 1035 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 1036 {
 1037         struct c4iw_cq *chp;
 1038         int ret = 0;
 1039         unsigned long flag;
 1040 
 1041         chp = to_c4iw_cq(ibcq);
 1042         spin_lock_irqsave(&chp->lock, flag);
 1043         t4_arm_cq(&chp->cq,
 1044                   (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
 1045         if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 1046                 ret = t4_cq_notempty(&chp->cq);
 1047         spin_unlock_irqrestore(&chp->lock, flag);
 1048         return ret;
 1049 }
 1050 #endif
Cache object: b593d76e15ef5bc32390c1e284e255d7 
 
 |