The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/iser/iser_verbs.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $FreeBSD$ */
    2 /*-
    3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include "icl_iser.h"
   28 
   29 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend");
   30 static int iser_cq_poll_limit = 512;
   31 
   32 static void
   33 iser_cq_event_callback(struct ib_event *cause, void *context)
   34 {
   35         ISER_ERR("got cq event %d", cause->event);
   36 }
   37 
   38 static void
   39 iser_qp_event_callback(struct ib_event *cause, void *context)
   40 {
   41         ISER_ERR("got qp event %d", cause->event);
   42 }
   43 
   44 static void
   45 iser_event_handler(struct ib_event_handler *handler,
   46                                 struct ib_event *event)
   47 {
   48         ISER_ERR("async event %d on device %s port %d",
   49                  event->event, event->device->name,
   50                  event->element.port_num);
   51 }
   52 
   53 /**
   54  * is_iser_tx_desc - Indicate if the completion wr_id
   55  *     is a TX descriptor or not.
   56  * @iser_conn: iser connection
   57  * @wr_id: completion WR identifier
   58  *
   59  * Since we cannot rely on wc opcode in FLUSH errors
   60  * we must work around it by checking if the wr_id address
   61  * falls in the iser connection rx_descs buffer. If so
   62  * it is an RX descriptor, otherwize it is a TX.
   63  */
   64 static inline bool
   65 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
   66 {
   67         void *start = iser_conn->rx_descs;
   68         u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
   69         void *end = (void *)((uintptr_t)start + (uintptr_t)len);
   70 
   71         if (start) {
   72                 if (wr_id >= start && wr_id < end)
   73                         return false;
   74         } else {
   75                 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf);
   76         }
   77 
   78         return true;
   79 }
   80 
   81 /**
   82  * iser_handle_comp_error() - Handle error completion
   83  * @ib_conn:   connection RDMA resources
   84  * @wc:        work completion
   85  *
   86  * Notes: Update post_recv_buf_count in case of recv error completion.
   87  *        For non-FLUSH error completion we should also notify iscsi layer that
   88  *        connection is failed (in case we passed bind stage).
   89  */
   90 static void
   91 iser_handle_comp_error(struct ib_conn *ib_conn,
   92                        struct ib_wc *wc)
   93 {
   94         void *wr_id = (void *)(uintptr_t)wc->wr_id;
   95         struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
   96                                                    ib_conn);
   97 
   98         if (is_iser_tx_desc(iser_conn, wr_id)) {
   99                 ISER_DBG("conn %p got send comp error", iser_conn);
  100         } else {
  101                 ISER_DBG("conn %p got recv comp error", iser_conn);
  102                 ib_conn->post_recv_buf_count--;
  103         }
  104         if (wc->status != IB_WC_WR_FLUSH_ERR)
  105                 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
  106 }
  107 
  108 /**
  109  * iser_handle_wc - handle a single work completion
  110  * @wc: work completion
  111  *
  112  * Soft-IRQ context, work completion can be either
  113  * SEND or RECV, and can turn out successful or
  114  * with error (or flush error).
  115  */
  116 static void iser_handle_wc(struct ib_wc *wc)
  117 {
  118         struct ib_conn *ib_conn;
  119         struct iser_tx_desc *tx_desc;
  120         struct iser_rx_desc *rx_desc;
  121 
  122         ib_conn = wc->qp->qp_context;
  123         if (likely(wc->status == IB_WC_SUCCESS)) {
  124                 if (wc->opcode == IB_WC_RECV) {
  125                         rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
  126                         iser_rcv_completion(rx_desc, wc->byte_len,
  127                                             ib_conn);
  128                 } else
  129                 if (wc->opcode == IB_WC_SEND) {
  130                         tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
  131                         iser_snd_completion(tx_desc, ib_conn);
  132                 } else {
  133                         ISER_ERR("Unknown wc opcode %d", wc->opcode);
  134                 }
  135         } else {
  136                 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
  137                                         ib_conn);
  138                 if (wc->status != IB_WC_WR_FLUSH_ERR) {
  139                         ISER_ERR("conn %p wr id %llx status %d vend_err %x",
  140                                  iser_conn, (unsigned long long)wc->wr_id,
  141                                  wc->status, wc->vendor_err);
  142                 } else {
  143                         ISER_DBG("flush error: conn %p wr id %llx",
  144                                  iser_conn, (unsigned long long)wc->wr_id);
  145                 }
  146 
  147                 if (wc->wr_id == ISER_BEACON_WRID) {
  148                         /* all flush errors were consumed */
  149                         mtx_lock(&ib_conn->beacon.flush_lock);
  150                         ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn);
  151                         cv_signal(&ib_conn->beacon.flush_cv);
  152                         mtx_unlock(&ib_conn->beacon.flush_lock);
  153                 } else {
  154                         iser_handle_comp_error(ib_conn, wc);
  155                 }
  156         }
  157 }
  158 
  159 static void
  160 iser_cq_tasklet_fn(void *data, int pending)
  161 {
  162         struct iser_comp *comp = (struct iser_comp *)data;
  163         struct ib_cq *cq = comp->cq;
  164         struct ib_wc *const wcs = comp->wcs;
  165         int completed = 0;
  166         int i;
  167         int n;
  168 
  169         while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
  170                 for (i = 0; i < n; i++)
  171                         iser_handle_wc(&wcs[i]);
  172 
  173                 completed += n;
  174                 if (completed >= iser_cq_poll_limit)
  175                         break;
  176         }
  177 
  178         /*
  179          * It is assumed here that arming CQ only once its empty
  180          * would not cause interrupts to be missed.
  181          */
  182         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
  183 }
  184 
  185 static void
  186 iser_cq_callback(struct ib_cq *cq, void *cq_context)
  187 {
  188         struct iser_comp *comp = cq_context;
  189 
  190         taskqueue_enqueue(comp->tq, &comp->task);
  191 }
  192 
  193 /**
  194  * iser_create_device_ib_res - creates Protection Domain (PD), Completion
  195  * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
  196  * the adapator.
  197  *
  198  * returns 0 on success, -1 on failure
  199  */
  200 static int
  201 iser_create_device_ib_res(struct iser_device *device)
  202 {
  203         struct ib_device *ib_dev = device->ib_device;
  204         int i, max_cqe;
  205 
  206         if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
  207                 ISER_ERR("device %s doesn't support Fastreg, "
  208                          "can't register memory", device->ib_device->name);
  209                 return (1);
  210         }
  211 
  212         device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors);
  213 
  214         device->comps = malloc(device->comps_used * sizeof(*device->comps),
  215                 M_ISER_VERBS, M_WAITOK | M_ZERO);
  216         if (!device->comps)
  217                 goto comps_err;
  218 
  219         max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
  220 
  221         ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d",
  222                  device->comps_used, device->ib_device->name,
  223                  device->ib_device->num_comp_vectors, max_cqe);
  224 
  225         device->pd = ib_alloc_pd(device->ib_device, IB_PD_UNSAFE_GLOBAL_RKEY);
  226         if (IS_ERR(device->pd))
  227                 goto pd_err;
  228 
  229         for (i = 0; i < device->comps_used; i++) {
  230                 struct iser_comp *comp = &device->comps[i];
  231                 struct ib_cq_init_attr cq_attr = {
  232                         .cqe            = max_cqe,
  233                         .comp_vector    = i,
  234                 };
  235 
  236                 comp->device = device;
  237                 comp->cq = ib_create_cq(device->ib_device,
  238                                         iser_cq_callback,
  239                                         iser_cq_event_callback,
  240                                         (void *)comp,
  241                                         &cq_attr);
  242                 if (IS_ERR(comp->cq)) {
  243                         comp->cq = NULL;
  244                         goto cq_err;
  245                 }
  246 
  247                 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
  248                         goto cq_err;
  249 
  250                 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp);
  251                 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT,
  252                                 taskqueue_thread_enqueue, &comp->tq);
  253                 if (!comp->tq)
  254                         goto tq_err;
  255                 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq");
  256         }
  257 
  258         device->mr = device->pd->__internal_mr;
  259         if (IS_ERR(device->mr))
  260                 goto tq_err;
  261 
  262         INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
  263                                 iser_event_handler);
  264         if (ib_register_event_handler(&device->event_handler))
  265                 goto tq_err;
  266 
  267         return (0);
  268 
  269 tq_err:
  270         for (i = 0; i < device->comps_used; i++) {
  271                 struct iser_comp *comp = &device->comps[i];
  272                 if (comp->tq)
  273                         taskqueue_free(comp->tq);
  274         }
  275 cq_err:
  276         for (i = 0; i < device->comps_used; i++) {
  277                 struct iser_comp *comp = &device->comps[i];
  278                 if (comp->cq)
  279                         ib_destroy_cq(comp->cq);
  280         }
  281         ib_dealloc_pd(device->pd);
  282 pd_err:
  283         free(device->comps, M_ISER_VERBS);
  284 comps_err:
  285         ISER_ERR("failed to allocate an IB resource");
  286         return (1);
  287 }
  288 
  289 /**
  290  * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
  291  * CQ and PD created with the device associated with the adapator.
  292  */
  293 static void
  294 iser_free_device_ib_res(struct iser_device *device)
  295 {
  296         int i;
  297 
  298         for (i = 0; i < device->comps_used; i++) {
  299                 struct iser_comp *comp = &device->comps[i];
  300 
  301                 taskqueue_free(comp->tq);
  302                 ib_destroy_cq(comp->cq);
  303                 comp->cq = NULL;
  304         }
  305 
  306         (void)ib_unregister_event_handler(&device->event_handler);
  307         (void)ib_dealloc_pd(device->pd);
  308 
  309         free(device->comps, M_ISER_VERBS);
  310         device->comps = NULL;
  311 
  312         device->mr = NULL;
  313         device->pd = NULL;
  314 }
  315 
  316 static int
  317 iser_alloc_reg_res(struct ib_device *ib_device,
  318                    struct ib_pd *pd,
  319                    struct iser_reg_resources *res)
  320 {
  321         int ret;
  322 
  323         res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE + 1);
  324         if (IS_ERR(res->mr)) {
  325                 ret = -PTR_ERR(res->mr);
  326                 ISER_ERR("Failed to allocate  fast reg mr err=%d", ret);
  327                 return (ret);
  328         }
  329         res->mr_valid = 1;
  330 
  331         return (0);
  332 }
  333 
  334 static void
  335 iser_free_reg_res(struct iser_reg_resources *rsc)
  336 {
  337         ib_dereg_mr(rsc->mr);
  338 }
  339 
  340 static struct fast_reg_descriptor *
  341 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd)
  342 {
  343         struct fast_reg_descriptor *desc;
  344         int ret;
  345 
  346         desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO);
  347         if (!desc) {
  348                 ISER_ERR("Failed to allocate a new fastreg descriptor");
  349                 return (NULL);
  350         }
  351 
  352         ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc);
  353         if (ret) {
  354                 ISER_ERR("failed to allocate reg_resources");
  355                 goto err;
  356         }
  357 
  358         return (desc);
  359 err:
  360         free(desc, M_ISER_VERBS);
  361         return (NULL);
  362 }
  363 
  364 /**
  365  * iser_create_fmr_pool - Creates FMR pool and page_vector
  366  *
  367  * returns 0 on success, or errno code on failure
  368  */
  369 int
  370 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
  371 {
  372         struct iser_device *device = ib_conn->device;
  373         struct fast_reg_descriptor *desc;
  374         int i;
  375 
  376         INIT_LIST_HEAD(&ib_conn->fastreg.pool);
  377         ib_conn->fastreg.pool_size = 0;
  378         for (i = 0; i < cmds_max; i++) {
  379                 desc = iser_create_fastreg_desc(device->ib_device, device->pd);
  380                 if (!desc) {
  381                         ISER_ERR("Failed to create fastreg descriptor");
  382                         goto err;
  383                 }
  384 
  385                 list_add_tail(&desc->list, &ib_conn->fastreg.pool);
  386                 ib_conn->fastreg.pool_size++;
  387         }
  388 
  389         return (0);
  390 
  391 err:
  392         iser_free_fastreg_pool(ib_conn);
  393         return (ENOMEM);
  394 }
  395 
  396 /**
  397  * iser_free_fmr_pool - releases the FMR pool and page vec
  398  */
  399 void
  400 iser_free_fastreg_pool(struct ib_conn *ib_conn)
  401 {
  402         struct fast_reg_descriptor *desc, *tmp;
  403         int i = 0;
  404 
  405         if (list_empty(&ib_conn->fastreg.pool))
  406                 return;
  407 
  408         ISER_DBG("freeing conn %p fr pool", ib_conn);
  409 
  410         list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
  411                 list_del(&desc->list);
  412                 iser_free_reg_res(&desc->rsc);
  413                 free(desc, M_ISER_VERBS);
  414                 ++i;
  415         }
  416 
  417         if (i < ib_conn->fastreg.pool_size)
  418                 ISER_WARN("pool still has %d regions registered",
  419                           ib_conn->fastreg.pool_size - i);
  420 }
  421 
  422 /**
  423  * iser_create_ib_conn_res - Queue-Pair (QP)
  424  *
  425  * returns 0 on success, 1 on failure
  426  */
  427 static int
  428 iser_create_ib_conn_res(struct ib_conn *ib_conn)
  429 {
  430         struct iser_conn *iser_conn;
  431         struct iser_device *device;
  432         struct ib_device_attr *dev_attr;
  433         struct ib_qp_init_attr init_attr;
  434         int index, min_index = 0;
  435         int ret = -ENOMEM;
  436 
  437         iser_conn = container_of(ib_conn, struct iser_conn, ib_conn);
  438         device = ib_conn->device;
  439         dev_attr = &device->dev_attr;
  440 
  441         mtx_lock(&ig.connlist_mutex);
  442         /* select the CQ with the minimal number of usages */
  443         for (index = 0; index < device->comps_used; index++) {
  444                 if (device->comps[index].active_qps <
  445                     device->comps[min_index].active_qps)
  446                         min_index = index;
  447         }
  448         ib_conn->comp = &device->comps[min_index];
  449         ib_conn->comp->active_qps++;
  450         mtx_unlock(&ig.connlist_mutex);
  451         ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn);
  452 
  453         memset(&init_attr, 0, sizeof init_attr);
  454         init_attr.event_handler = iser_qp_event_callback;
  455         init_attr.qp_context    = (void *)ib_conn;
  456         init_attr.send_cq       = ib_conn->comp->cq;
  457         init_attr.recv_cq       = ib_conn->comp->cq;
  458         init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
  459         init_attr.cap.max_send_sge = 2;
  460         init_attr.cap.max_recv_sge = 1;
  461         init_attr.sq_sig_type   = IB_SIGNAL_REQ_WR;
  462         init_attr.qp_type       = IB_QPT_RC;
  463 
  464         if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
  465                 init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
  466                 iser_conn->max_cmds =
  467                         ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
  468         } else {
  469                 init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
  470                 iser_conn->max_cmds =
  471                         ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
  472         }
  473         ISER_DBG("device %s supports max_send_wr %d",
  474                  device->ib_device->name, dev_attr->max_qp_wr);
  475 
  476         ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
  477         if (ret)
  478                 goto out_err;
  479 
  480         ib_conn->qp = ib_conn->cma_id->qp;
  481         ISER_DBG("setting conn %p cma_id %p qp %p",
  482                  ib_conn, ib_conn->cma_id,
  483                  ib_conn->cma_id->qp);
  484 
  485         return (ret);
  486 
  487 out_err:
  488         mtx_lock(&ig.connlist_mutex);
  489         ib_conn->comp->active_qps--;
  490         mtx_unlock(&ig.connlist_mutex);
  491         ISER_ERR("unable to alloc mem or create resource, err %d", ret);
  492 
  493         return (ret);
  494 }
  495 
  496 /**
  497  * based on the resolved device node GUID see if there already allocated
  498  * device for this device. If there's no such, create one.
  499  */
  500 static struct iser_device *
  501 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
  502 {
  503         struct iser_device *device;
  504 
  505         sx_xlock(&ig.device_list_mutex);
  506 
  507         list_for_each_entry(device, &ig.device_list, ig_list)
  508                 /* find if there's a match using the node GUID */
  509                 if (device->ib_device->node_guid == cma_id->device->node_guid)
  510                         goto inc_refcnt;
  511 
  512         device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO);
  513         if (device == NULL)
  514                 goto out;
  515 
  516         /* assign this device to the device */
  517         device->ib_device = cma_id->device;
  518         /* init the device and link it into ig device list */
  519         if (iser_create_device_ib_res(device)) {
  520                 free(device, M_ISER_VERBS);
  521                 device = NULL;
  522                 goto out;
  523         }
  524         list_add(&device->ig_list, &ig.device_list);
  525 
  526 inc_refcnt:
  527         device->refcount++;
  528         ISER_INFO("device %p refcount %d", device, device->refcount);
  529 out:
  530         sx_xunlock(&ig.device_list_mutex);
  531         return (device);
  532 }
  533 
  534 /* if there's no demand for this device, release it */
  535 static void
  536 iser_device_try_release(struct iser_device *device)
  537 {
  538         sx_xlock(&ig.device_list_mutex);
  539         device->refcount--;
  540         ISER_INFO("device %p refcount %d", device, device->refcount);
  541         if (!device->refcount) {
  542                 iser_free_device_ib_res(device);
  543                 list_del(&device->ig_list);
  544                 free(device, M_ISER_VERBS);
  545                 device = NULL;
  546         }
  547         sx_xunlock(&ig.device_list_mutex);
  548 }
  549 
  550 /**
  551  * Called with state mutex held
  552  **/
  553 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
  554                                      enum iser_conn_state comp,
  555                                      enum iser_conn_state exch)
  556 {
  557         int ret;
  558 
  559         ret = (iser_conn->state == comp);
  560         if (ret)
  561                 iser_conn->state = exch;
  562 
  563         return ret;
  564 }
  565 
  566 /**
  567  * iser_free_ib_conn_res - release IB related resources
  568  * @iser_conn: iser connection struct
  569  * @destroy: indicator if we need to try to release the
  570  *     iser device and memory regoins pool (only iscsi
  571  *     shutdown and DEVICE_REMOVAL will use this).
  572  *
  573  * This routine is called with the iser state mutex held
  574  * so the cm_id removal is out of here. It is Safe to
  575  * be invoked multiple times.
  576  */
  577 void
  578 iser_free_ib_conn_res(struct iser_conn *iser_conn,
  579                                   bool destroy)
  580 {
  581         struct ib_conn *ib_conn = &iser_conn->ib_conn;
  582         struct iser_device *device = ib_conn->device;
  583 
  584         ISER_INFO("freeing conn %p cma_id %p qp %p",
  585                   iser_conn, ib_conn->cma_id, ib_conn->qp);
  586 
  587         if (ib_conn->qp != NULL) {
  588                 mtx_lock(&ig.connlist_mutex);
  589                 ib_conn->comp->active_qps--;
  590                 mtx_unlock(&ig.connlist_mutex);
  591                 rdma_destroy_qp(ib_conn->cma_id);
  592                 ib_conn->qp = NULL;
  593         }
  594 
  595         if (destroy) {
  596                 if (iser_conn->login_buf)
  597                         iser_free_login_buf(iser_conn);
  598 
  599                 if (iser_conn->rx_descs)
  600                         iser_free_rx_descriptors(iser_conn);
  601 
  602                 if (device != NULL) {
  603                         iser_device_try_release(device);
  604                         ib_conn->device = NULL;
  605                 }
  606         }
  607 }
  608 
  609 /**
  610  * triggers start of the disconnect procedures and wait for them to be done
  611  * Called with state mutex held
  612  */
  613 int
  614 iser_conn_terminate(struct iser_conn *iser_conn)
  615 {
  616         struct ib_conn *ib_conn = &iser_conn->ib_conn;
  617         const struct ib_send_wr *bad_send_wr;
  618         const struct ib_recv_wr *bad_recv_wr;
  619         int err = 0;
  620 
  621         /* terminate the iser conn only if the conn state is UP */
  622         if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
  623                                            ISER_CONN_TERMINATING))
  624                 return (0);
  625 
  626         ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state);
  627 
  628         if (ib_conn->qp == NULL) {
  629                 /* HOW can this be??? */
  630                 ISER_WARN("qp wasn't created");
  631                 return (1);
  632         }
  633 
  634         /*
  635          * Todo: This is a temporary workaround.
  636          * We serialize the connection closure using global lock in order to
  637          * receive all posted beacons completions.
  638          * Without Serialization, in case we open many connections (QPs) on
  639          * the same CQ, we might miss beacons because of missing interrupts.
  640          */
  641         sx_xlock(&ig.close_conns_mutex);
  642 
  643         /*
  644          * In case we didn't already clean up the cma_id (peer initiated
  645          * a disconnection), we need to Cause the CMA to change the QP
  646          * state to ERROR.
  647          */
  648         if (ib_conn->cma_id) {
  649                 err = rdma_disconnect(ib_conn->cma_id);
  650                 if (err)
  651                         ISER_ERR("Failed to disconnect, conn: 0x%p err %d",
  652                                 iser_conn, err);
  653 
  654                 mtx_lock(&ib_conn->beacon.flush_lock);
  655                 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr));
  656                 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID;
  657                 ib_conn->beacon.send.opcode = IB_WR_SEND;
  658                 /* post an indication that all send flush errors were consumed */
  659                 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr);
  660                 if (err) {
  661                         ISER_ERR("conn %p failed to post send_beacon", ib_conn);
  662                         mtx_unlock(&ib_conn->beacon.flush_lock);
  663                         goto out;
  664                 }
  665 
  666                 ISER_DBG("before send cv_wait: %p", iser_conn);
  667                 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
  668                 ISER_DBG("after send cv_wait: %p", iser_conn);
  669 
  670                 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr));
  671                 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID;
  672                 /* post an indication that all recv flush errors were consumed */
  673                 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr);
  674                 if (err) {
  675                         ISER_ERR("conn %p failed to post recv_beacon", ib_conn);
  676                         mtx_unlock(&ib_conn->beacon.flush_lock);
  677                         goto out;
  678                 }
  679 
  680                 ISER_DBG("before recv cv_wait: %p", iser_conn);
  681                 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
  682                 mtx_unlock(&ib_conn->beacon.flush_lock);
  683                 ISER_DBG("after recv cv_wait: %p", iser_conn);
  684         }
  685 out:
  686         sx_xunlock(&ig.close_conns_mutex);
  687         return (1);
  688 }
  689 
  690 /**
  691  * Called with state mutex held
  692  **/
  693 static void
  694 iser_connect_error(struct rdma_cm_id *cma_id)
  695 {
  696         struct iser_conn *iser_conn;
  697 
  698         iser_conn = cma_id->context;
  699 
  700         ISER_ERR("conn %p", iser_conn);
  701 
  702         iser_conn->state = ISER_CONN_TERMINATING;
  703 
  704         cv_signal(&iser_conn->up_cv);
  705 }
  706 
  707 /**
  708  * Called with state mutex held
  709  **/
  710 static void
  711 iser_addr_handler(struct rdma_cm_id *cma_id)
  712 {
  713         struct iser_device *device;
  714         struct iser_conn   *iser_conn;
  715         struct ib_conn   *ib_conn;
  716         int    ret;
  717 
  718         iser_conn = cma_id->context;
  719 
  720         ib_conn = &iser_conn->ib_conn;
  721         device = iser_device_find_by_ib_device(cma_id);
  722         if (!device) {
  723                 ISER_ERR("conn %p device lookup/creation failed",
  724                          iser_conn);
  725                 iser_connect_error(cma_id);
  726                 return;
  727         }
  728 
  729         ib_conn->device = device;
  730 
  731         ret = rdma_resolve_route(cma_id, 1000);
  732         if (ret) {
  733                 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret);
  734                 iser_connect_error(cma_id);
  735                 return;
  736         }
  737 }
  738 
  739 /**
  740  * Called with state mutex held
  741  **/
  742 static void
  743 iser_route_handler(struct rdma_cm_id *cma_id)
  744 {
  745         struct rdma_conn_param conn_param;
  746         int    ret;
  747         struct iser_cm_hdr req_hdr;
  748         struct iser_conn *iser_conn = cma_id->context;
  749         struct ib_conn *ib_conn = &iser_conn->ib_conn;
  750         struct iser_device *device = ib_conn->device;
  751 
  752         ret = iser_create_ib_conn_res(ib_conn);
  753         if (ret)
  754                 goto failure;
  755 
  756         memset(&conn_param, 0, sizeof conn_param);
  757         conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
  758         conn_param.retry_count         = 7;
  759         conn_param.rnr_retry_count     = 6;
  760         /*
  761          * Initiaotr depth should not be set, but in order to compat
  762          * with old targets, we keep this value set.
  763          */
  764         conn_param.initiator_depth     = 1;
  765 
  766         memset(&req_hdr, 0, sizeof(req_hdr));
  767         req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
  768                         ISER_SEND_W_INV_NOT_SUPPORTED);
  769         conn_param.private_data         = (void *)&req_hdr;
  770         conn_param.private_data_len     = sizeof(struct iser_cm_hdr);
  771 
  772         ret = rdma_connect(cma_id, &conn_param);
  773         if (ret) {
  774                 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret);
  775                 goto failure;
  776         }
  777 
  778         return;
  779 failure:
  780         iser_connect_error(cma_id);
  781 }
  782 
  783 /**
  784  * Called with state mutex held
  785  **/
  786 static void
  787 iser_connected_handler(struct rdma_cm_id *cma_id)
  788 {
  789         struct iser_conn *iser_conn;
  790         struct ib_qp_attr attr;
  791         struct ib_qp_init_attr init_attr;
  792 
  793         iser_conn = cma_id->context;
  794 
  795         (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
  796 
  797         ISER_INFO("remote qpn:%x my qpn:%x",
  798                   attr.dest_qp_num, cma_id->qp->qp_num);
  799 
  800         iser_conn->state = ISER_CONN_UP;
  801 
  802         cv_signal(&iser_conn->up_cv);
  803 }
  804 
  805 /**
  806  * Called with state mutex held
  807  **/
  808 static void
  809 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy)
  810 {
  811         struct iser_conn *iser_conn = cma_id->context;
  812 
  813         if (iser_conn_terminate(iser_conn))
  814                 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
  815 
  816 }
  817 
  818 int
  819 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
  820 {
  821         struct iser_conn *iser_conn;
  822         int ret = 0;
  823 
  824         iser_conn = cma_id->context;
  825         ISER_INFO("event %d status %d conn %p id %p",
  826                   event->event, event->status, cma_id->context, cma_id);
  827 
  828         sx_xlock(&iser_conn->state_mutex);
  829         switch (event->event) {
  830         case RDMA_CM_EVENT_ADDR_RESOLVED:
  831                 iser_addr_handler(cma_id);
  832                 break;
  833         case RDMA_CM_EVENT_ROUTE_RESOLVED:
  834                 iser_route_handler(cma_id);
  835                 break;
  836         case RDMA_CM_EVENT_ESTABLISHED:
  837                 iser_connected_handler(cma_id);
  838                 break;
  839         case RDMA_CM_EVENT_ADDR_ERROR:
  840         case RDMA_CM_EVENT_ROUTE_ERROR:
  841         case RDMA_CM_EVENT_CONNECT_ERROR:
  842         case RDMA_CM_EVENT_UNREACHABLE:
  843         case RDMA_CM_EVENT_REJECTED:
  844                 iser_connect_error(cma_id);
  845                 break;
  846         case RDMA_CM_EVENT_DISCONNECTED:
  847         case RDMA_CM_EVENT_ADDR_CHANGE:
  848         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
  849                 iser_cleanup_handler(cma_id, false);
  850                 break;
  851         default:
  852                 ISER_ERR("Unexpected RDMA CM event (%d)", event->event);
  853                 break;
  854         }
  855         sx_xunlock(&iser_conn->state_mutex);
  856 
  857         return (ret);
  858 }
  859 
  860 int
  861 iser_post_recvl(struct iser_conn *iser_conn)
  862 {
  863         const struct ib_recv_wr *rx_wr_failed;
  864         struct ib_recv_wr rx_wr;
  865         struct ib_conn *ib_conn = &iser_conn->ib_conn;
  866         struct ib_sge     sge;
  867         int ib_ret;
  868 
  869         sge.addr   = iser_conn->login_resp_dma;
  870         sge.length = ISER_RX_LOGIN_SIZE;
  871         sge.lkey   = ib_conn->device->mr->lkey;
  872 
  873         rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
  874         rx_wr.sg_list = &sge;
  875         rx_wr.num_sge = 1;
  876         rx_wr.next    = NULL;
  877 
  878         ib_conn->post_recv_buf_count++;
  879         ib_ret  = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
  880         if (ib_ret) {
  881                 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
  882                 ib_conn->post_recv_buf_count--;
  883         }
  884 
  885         return (ib_ret);
  886 }
  887 
  888 int
  889 iser_post_recvm(struct iser_conn *iser_conn, int count)
  890 {
  891         const struct ib_recv_wr *rx_wr_failed;
  892         struct ib_recv_wr *rx_wr;
  893         int i, ib_ret;
  894         struct ib_conn *ib_conn = &iser_conn->ib_conn;
  895         unsigned int my_rx_head = iser_conn->rx_desc_head;
  896         struct iser_rx_desc *rx_desc;
  897 
  898         for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
  899                 rx_desc         = &iser_conn->rx_descs[my_rx_head];
  900                 rx_wr->wr_id    = (uintptr_t)rx_desc;
  901                 rx_wr->sg_list  = &rx_desc->rx_sg;
  902                 rx_wr->num_sge  = 1;
  903                 rx_wr->next     = rx_wr + 1;
  904                 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos;
  905         }
  906 
  907         rx_wr--;
  908         rx_wr->next = NULL; /* mark end of work requests list */
  909 
  910         ib_conn->post_recv_buf_count += count;
  911         ib_ret  = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
  912         if (ib_ret) {
  913                 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
  914                 ib_conn->post_recv_buf_count -= count;
  915         } else
  916                 iser_conn->rx_desc_head = my_rx_head;
  917 
  918         return (ib_ret);
  919 }
  920 
  921 /**
  922  * iser_start_send - Initiate a Send DTO operation
  923  *
  924  * returns 0 on success, -1 on failure
  925  */
  926 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
  927                    bool signal)
  928 {
  929         int               ib_ret;
  930         const struct ib_send_wr *send_wr_failed;
  931         struct ib_send_wr send_wr;
  932 
  933         ib_dma_sync_single_for_device(ib_conn->device->ib_device,
  934                                       tx_desc->dma_addr, ISER_HEADERS_LEN,
  935                                       DMA_TO_DEVICE);
  936 
  937         send_wr.next       = NULL;
  938         send_wr.wr_id      = (uintptr_t)tx_desc;
  939         send_wr.sg_list    = tx_desc->tx_sg;
  940         send_wr.num_sge    = tx_desc->num_sge;
  941         send_wr.opcode     = IB_WR_SEND;
  942         send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
  943 
  944         ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
  945         if (ib_ret)
  946                 ISER_ERR("ib_post_send failed, ret:%d", ib_ret);
  947 
  948         return (ib_ret);
  949 }

Cache object: 8ccc78dbbcdae270bd903b16d4305c07


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.