1 /* $FreeBSD$ */
2 /*-
3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include "icl_iser.h"
28
29 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend");
30 static int iser_cq_poll_limit = 512;
31
32 static void
33 iser_cq_event_callback(struct ib_event *cause, void *context)
34 {
35 ISER_ERR("got cq event %d", cause->event);
36 }
37
38 static void
39 iser_qp_event_callback(struct ib_event *cause, void *context)
40 {
41 ISER_ERR("got qp event %d", cause->event);
42 }
43
44 static void
45 iser_event_handler(struct ib_event_handler *handler,
46 struct ib_event *event)
47 {
48 ISER_ERR("async event %d on device %s port %d",
49 event->event, event->device->name,
50 event->element.port_num);
51 }
52
53 /**
54 * is_iser_tx_desc - Indicate if the completion wr_id
55 * is a TX descriptor or not.
56 * @iser_conn: iser connection
57 * @wr_id: completion WR identifier
58 *
59 * Since we cannot rely on wc opcode in FLUSH errors
60 * we must work around it by checking if the wr_id address
61 * falls in the iser connection rx_descs buffer. If so
62 * it is an RX descriptor, otherwize it is a TX.
63 */
64 static inline bool
65 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
66 {
67 void *start = iser_conn->rx_descs;
68 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
69 void *end = (void *)((uintptr_t)start + (uintptr_t)len);
70
71 if (start) {
72 if (wr_id >= start && wr_id < end)
73 return false;
74 } else {
75 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf);
76 }
77
78 return true;
79 }
80
81 /**
82 * iser_handle_comp_error() - Handle error completion
83 * @ib_conn: connection RDMA resources
84 * @wc: work completion
85 *
86 * Notes: Update post_recv_buf_count in case of recv error completion.
87 * For non-FLUSH error completion we should also notify iscsi layer that
88 * connection is failed (in case we passed bind stage).
89 */
90 static void
91 iser_handle_comp_error(struct ib_conn *ib_conn,
92 struct ib_wc *wc)
93 {
94 void *wr_id = (void *)(uintptr_t)wc->wr_id;
95 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
96 ib_conn);
97
98 if (is_iser_tx_desc(iser_conn, wr_id)) {
99 ISER_DBG("conn %p got send comp error", iser_conn);
100 } else {
101 ISER_DBG("conn %p got recv comp error", iser_conn);
102 ib_conn->post_recv_buf_count--;
103 }
104 if (wc->status != IB_WC_WR_FLUSH_ERR)
105 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
106 }
107
108 /**
109 * iser_handle_wc - handle a single work completion
110 * @wc: work completion
111 *
112 * Soft-IRQ context, work completion can be either
113 * SEND or RECV, and can turn out successful or
114 * with error (or flush error).
115 */
116 static void iser_handle_wc(struct ib_wc *wc)
117 {
118 struct ib_conn *ib_conn;
119 struct iser_tx_desc *tx_desc;
120 struct iser_rx_desc *rx_desc;
121
122 ib_conn = wc->qp->qp_context;
123 if (likely(wc->status == IB_WC_SUCCESS)) {
124 if (wc->opcode == IB_WC_RECV) {
125 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
126 iser_rcv_completion(rx_desc, wc->byte_len,
127 ib_conn);
128 } else
129 if (wc->opcode == IB_WC_SEND) {
130 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
131 iser_snd_completion(tx_desc, ib_conn);
132 } else {
133 ISER_ERR("Unknown wc opcode %d", wc->opcode);
134 }
135 } else {
136 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
137 ib_conn);
138 if (wc->status != IB_WC_WR_FLUSH_ERR) {
139 ISER_ERR("conn %p wr id %llx status %d vend_err %x",
140 iser_conn, (unsigned long long)wc->wr_id,
141 wc->status, wc->vendor_err);
142 } else {
143 ISER_DBG("flush error: conn %p wr id %llx",
144 iser_conn, (unsigned long long)wc->wr_id);
145 }
146
147 if (wc->wr_id == ISER_BEACON_WRID) {
148 /* all flush errors were consumed */
149 mtx_lock(&ib_conn->beacon.flush_lock);
150 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn);
151 cv_signal(&ib_conn->beacon.flush_cv);
152 mtx_unlock(&ib_conn->beacon.flush_lock);
153 } else {
154 iser_handle_comp_error(ib_conn, wc);
155 }
156 }
157 }
158
159 static void
160 iser_cq_tasklet_fn(void *data, int pending)
161 {
162 struct iser_comp *comp = (struct iser_comp *)data;
163 struct ib_cq *cq = comp->cq;
164 struct ib_wc *const wcs = comp->wcs;
165 int completed = 0;
166 int i;
167 int n;
168
169 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
170 for (i = 0; i < n; i++)
171 iser_handle_wc(&wcs[i]);
172
173 completed += n;
174 if (completed >= iser_cq_poll_limit)
175 break;
176 }
177
178 /*
179 * It is assumed here that arming CQ only once its empty
180 * would not cause interrupts to be missed.
181 */
182 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
183 }
184
185 static void
186 iser_cq_callback(struct ib_cq *cq, void *cq_context)
187 {
188 struct iser_comp *comp = cq_context;
189
190 taskqueue_enqueue(comp->tq, &comp->task);
191 }
192
193 /**
194 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
195 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
196 * the adapator.
197 *
198 * returns 0 on success, -1 on failure
199 */
200 static int
201 iser_create_device_ib_res(struct iser_device *device)
202 {
203 struct ib_device *ib_dev = device->ib_device;
204 int i, max_cqe;
205
206 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
207 ISER_ERR("device %s doesn't support Fastreg, "
208 "can't register memory", device->ib_device->name);
209 return (1);
210 }
211
212 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors);
213
214 device->comps = malloc(device->comps_used * sizeof(*device->comps),
215 M_ISER_VERBS, M_WAITOK | M_ZERO);
216 if (!device->comps)
217 goto comps_err;
218
219 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
220
221 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d",
222 device->comps_used, device->ib_device->name,
223 device->ib_device->num_comp_vectors, max_cqe);
224
225 device->pd = ib_alloc_pd(device->ib_device, IB_PD_UNSAFE_GLOBAL_RKEY);
226 if (IS_ERR(device->pd))
227 goto pd_err;
228
229 for (i = 0; i < device->comps_used; i++) {
230 struct iser_comp *comp = &device->comps[i];
231 struct ib_cq_init_attr cq_attr = {
232 .cqe = max_cqe,
233 .comp_vector = i,
234 };
235
236 comp->device = device;
237 comp->cq = ib_create_cq(device->ib_device,
238 iser_cq_callback,
239 iser_cq_event_callback,
240 (void *)comp,
241 &cq_attr);
242 if (IS_ERR(comp->cq)) {
243 comp->cq = NULL;
244 goto cq_err;
245 }
246
247 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
248 goto cq_err;
249
250 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp);
251 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT,
252 taskqueue_thread_enqueue, &comp->tq);
253 if (!comp->tq)
254 goto tq_err;
255 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq");
256 }
257
258 device->mr = device->pd->__internal_mr;
259 if (IS_ERR(device->mr))
260 goto tq_err;
261
262 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
263 iser_event_handler);
264 if (ib_register_event_handler(&device->event_handler))
265 goto tq_err;
266
267 return (0);
268
269 tq_err:
270 for (i = 0; i < device->comps_used; i++) {
271 struct iser_comp *comp = &device->comps[i];
272 if (comp->tq)
273 taskqueue_free(comp->tq);
274 }
275 cq_err:
276 for (i = 0; i < device->comps_used; i++) {
277 struct iser_comp *comp = &device->comps[i];
278 if (comp->cq)
279 ib_destroy_cq(comp->cq);
280 }
281 ib_dealloc_pd(device->pd);
282 pd_err:
283 free(device->comps, M_ISER_VERBS);
284 comps_err:
285 ISER_ERR("failed to allocate an IB resource");
286 return (1);
287 }
288
289 /**
290 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
291 * CQ and PD created with the device associated with the adapator.
292 */
293 static void
294 iser_free_device_ib_res(struct iser_device *device)
295 {
296 int i;
297
298 for (i = 0; i < device->comps_used; i++) {
299 struct iser_comp *comp = &device->comps[i];
300
301 taskqueue_free(comp->tq);
302 ib_destroy_cq(comp->cq);
303 comp->cq = NULL;
304 }
305
306 (void)ib_unregister_event_handler(&device->event_handler);
307 (void)ib_dealloc_pd(device->pd);
308
309 free(device->comps, M_ISER_VERBS);
310 device->comps = NULL;
311
312 device->mr = NULL;
313 device->pd = NULL;
314 }
315
316 static int
317 iser_alloc_reg_res(struct ib_device *ib_device,
318 struct ib_pd *pd,
319 struct iser_reg_resources *res)
320 {
321 int ret;
322
323 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE + 1);
324 if (IS_ERR(res->mr)) {
325 ret = -PTR_ERR(res->mr);
326 ISER_ERR("Failed to allocate fast reg mr err=%d", ret);
327 return (ret);
328 }
329 res->mr_valid = 1;
330
331 return (0);
332 }
333
334 static void
335 iser_free_reg_res(struct iser_reg_resources *rsc)
336 {
337 ib_dereg_mr(rsc->mr);
338 }
339
340 static struct fast_reg_descriptor *
341 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd)
342 {
343 struct fast_reg_descriptor *desc;
344 int ret;
345
346 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO);
347 if (!desc) {
348 ISER_ERR("Failed to allocate a new fastreg descriptor");
349 return (NULL);
350 }
351
352 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc);
353 if (ret) {
354 ISER_ERR("failed to allocate reg_resources");
355 goto err;
356 }
357
358 return (desc);
359 err:
360 free(desc, M_ISER_VERBS);
361 return (NULL);
362 }
363
364 /**
365 * iser_create_fmr_pool - Creates FMR pool and page_vector
366 *
367 * returns 0 on success, or errno code on failure
368 */
369 int
370 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
371 {
372 struct iser_device *device = ib_conn->device;
373 struct fast_reg_descriptor *desc;
374 int i;
375
376 INIT_LIST_HEAD(&ib_conn->fastreg.pool);
377 ib_conn->fastreg.pool_size = 0;
378 for (i = 0; i < cmds_max; i++) {
379 desc = iser_create_fastreg_desc(device->ib_device, device->pd);
380 if (!desc) {
381 ISER_ERR("Failed to create fastreg descriptor");
382 goto err;
383 }
384
385 list_add_tail(&desc->list, &ib_conn->fastreg.pool);
386 ib_conn->fastreg.pool_size++;
387 }
388
389 return (0);
390
391 err:
392 iser_free_fastreg_pool(ib_conn);
393 return (ENOMEM);
394 }
395
396 /**
397 * iser_free_fmr_pool - releases the FMR pool and page vec
398 */
399 void
400 iser_free_fastreg_pool(struct ib_conn *ib_conn)
401 {
402 struct fast_reg_descriptor *desc, *tmp;
403 int i = 0;
404
405 if (list_empty(&ib_conn->fastreg.pool))
406 return;
407
408 ISER_DBG("freeing conn %p fr pool", ib_conn);
409
410 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
411 list_del(&desc->list);
412 iser_free_reg_res(&desc->rsc);
413 free(desc, M_ISER_VERBS);
414 ++i;
415 }
416
417 if (i < ib_conn->fastreg.pool_size)
418 ISER_WARN("pool still has %d regions registered",
419 ib_conn->fastreg.pool_size - i);
420 }
421
422 /**
423 * iser_create_ib_conn_res - Queue-Pair (QP)
424 *
425 * returns 0 on success, 1 on failure
426 */
427 static int
428 iser_create_ib_conn_res(struct ib_conn *ib_conn)
429 {
430 struct iser_conn *iser_conn;
431 struct iser_device *device;
432 struct ib_device_attr *dev_attr;
433 struct ib_qp_init_attr init_attr;
434 int index, min_index = 0;
435 int ret = -ENOMEM;
436
437 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn);
438 device = ib_conn->device;
439 dev_attr = &device->dev_attr;
440
441 mtx_lock(&ig.connlist_mutex);
442 /* select the CQ with the minimal number of usages */
443 for (index = 0; index < device->comps_used; index++) {
444 if (device->comps[index].active_qps <
445 device->comps[min_index].active_qps)
446 min_index = index;
447 }
448 ib_conn->comp = &device->comps[min_index];
449 ib_conn->comp->active_qps++;
450 mtx_unlock(&ig.connlist_mutex);
451 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn);
452
453 memset(&init_attr, 0, sizeof init_attr);
454 init_attr.event_handler = iser_qp_event_callback;
455 init_attr.qp_context = (void *)ib_conn;
456 init_attr.send_cq = ib_conn->comp->cq;
457 init_attr.recv_cq = ib_conn->comp->cq;
458 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
459 init_attr.cap.max_send_sge = 2;
460 init_attr.cap.max_recv_sge = 1;
461 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
462 init_attr.qp_type = IB_QPT_RC;
463
464 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
465 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
466 iser_conn->max_cmds =
467 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
468 } else {
469 init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
470 iser_conn->max_cmds =
471 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
472 }
473 ISER_DBG("device %s supports max_send_wr %d",
474 device->ib_device->name, dev_attr->max_qp_wr);
475
476 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
477 if (ret)
478 goto out_err;
479
480 ib_conn->qp = ib_conn->cma_id->qp;
481 ISER_DBG("setting conn %p cma_id %p qp %p",
482 ib_conn, ib_conn->cma_id,
483 ib_conn->cma_id->qp);
484
485 return (ret);
486
487 out_err:
488 mtx_lock(&ig.connlist_mutex);
489 ib_conn->comp->active_qps--;
490 mtx_unlock(&ig.connlist_mutex);
491 ISER_ERR("unable to alloc mem or create resource, err %d", ret);
492
493 return (ret);
494 }
495
496 /**
497 * based on the resolved device node GUID see if there already allocated
498 * device for this device. If there's no such, create one.
499 */
500 static struct iser_device *
501 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
502 {
503 struct iser_device *device;
504
505 sx_xlock(&ig.device_list_mutex);
506
507 list_for_each_entry(device, &ig.device_list, ig_list)
508 /* find if there's a match using the node GUID */
509 if (device->ib_device->node_guid == cma_id->device->node_guid)
510 goto inc_refcnt;
511
512 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO);
513 if (device == NULL)
514 goto out;
515
516 /* assign this device to the device */
517 device->ib_device = cma_id->device;
518 /* init the device and link it into ig device list */
519 if (iser_create_device_ib_res(device)) {
520 free(device, M_ISER_VERBS);
521 device = NULL;
522 goto out;
523 }
524 list_add(&device->ig_list, &ig.device_list);
525
526 inc_refcnt:
527 device->refcount++;
528 ISER_INFO("device %p refcount %d", device, device->refcount);
529 out:
530 sx_xunlock(&ig.device_list_mutex);
531 return (device);
532 }
533
534 /* if there's no demand for this device, release it */
535 static void
536 iser_device_try_release(struct iser_device *device)
537 {
538 sx_xlock(&ig.device_list_mutex);
539 device->refcount--;
540 ISER_INFO("device %p refcount %d", device, device->refcount);
541 if (!device->refcount) {
542 iser_free_device_ib_res(device);
543 list_del(&device->ig_list);
544 free(device, M_ISER_VERBS);
545 device = NULL;
546 }
547 sx_xunlock(&ig.device_list_mutex);
548 }
549
550 /**
551 * Called with state mutex held
552 **/
553 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
554 enum iser_conn_state comp,
555 enum iser_conn_state exch)
556 {
557 int ret;
558
559 ret = (iser_conn->state == comp);
560 if (ret)
561 iser_conn->state = exch;
562
563 return ret;
564 }
565
566 /**
567 * iser_free_ib_conn_res - release IB related resources
568 * @iser_conn: iser connection struct
569 * @destroy: indicator if we need to try to release the
570 * iser device and memory regoins pool (only iscsi
571 * shutdown and DEVICE_REMOVAL will use this).
572 *
573 * This routine is called with the iser state mutex held
574 * so the cm_id removal is out of here. It is Safe to
575 * be invoked multiple times.
576 */
577 void
578 iser_free_ib_conn_res(struct iser_conn *iser_conn,
579 bool destroy)
580 {
581 struct ib_conn *ib_conn = &iser_conn->ib_conn;
582 struct iser_device *device = ib_conn->device;
583
584 ISER_INFO("freeing conn %p cma_id %p qp %p",
585 iser_conn, ib_conn->cma_id, ib_conn->qp);
586
587 if (ib_conn->qp != NULL) {
588 mtx_lock(&ig.connlist_mutex);
589 ib_conn->comp->active_qps--;
590 mtx_unlock(&ig.connlist_mutex);
591 rdma_destroy_qp(ib_conn->cma_id);
592 ib_conn->qp = NULL;
593 }
594
595 if (destroy) {
596 if (iser_conn->login_buf)
597 iser_free_login_buf(iser_conn);
598
599 if (iser_conn->rx_descs)
600 iser_free_rx_descriptors(iser_conn);
601
602 if (device != NULL) {
603 iser_device_try_release(device);
604 ib_conn->device = NULL;
605 }
606 }
607 }
608
609 /**
610 * triggers start of the disconnect procedures and wait for them to be done
611 * Called with state mutex held
612 */
613 int
614 iser_conn_terminate(struct iser_conn *iser_conn)
615 {
616 struct ib_conn *ib_conn = &iser_conn->ib_conn;
617 const struct ib_send_wr *bad_send_wr;
618 const struct ib_recv_wr *bad_recv_wr;
619 int err = 0;
620
621 /* terminate the iser conn only if the conn state is UP */
622 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
623 ISER_CONN_TERMINATING))
624 return (0);
625
626 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state);
627
628 if (ib_conn->qp == NULL) {
629 /* HOW can this be??? */
630 ISER_WARN("qp wasn't created");
631 return (1);
632 }
633
634 /*
635 * Todo: This is a temporary workaround.
636 * We serialize the connection closure using global lock in order to
637 * receive all posted beacons completions.
638 * Without Serialization, in case we open many connections (QPs) on
639 * the same CQ, we might miss beacons because of missing interrupts.
640 */
641 sx_xlock(&ig.close_conns_mutex);
642
643 /*
644 * In case we didn't already clean up the cma_id (peer initiated
645 * a disconnection), we need to Cause the CMA to change the QP
646 * state to ERROR.
647 */
648 if (ib_conn->cma_id) {
649 err = rdma_disconnect(ib_conn->cma_id);
650 if (err)
651 ISER_ERR("Failed to disconnect, conn: 0x%p err %d",
652 iser_conn, err);
653
654 mtx_lock(&ib_conn->beacon.flush_lock);
655 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr));
656 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID;
657 ib_conn->beacon.send.opcode = IB_WR_SEND;
658 /* post an indication that all send flush errors were consumed */
659 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr);
660 if (err) {
661 ISER_ERR("conn %p failed to post send_beacon", ib_conn);
662 mtx_unlock(&ib_conn->beacon.flush_lock);
663 goto out;
664 }
665
666 ISER_DBG("before send cv_wait: %p", iser_conn);
667 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
668 ISER_DBG("after send cv_wait: %p", iser_conn);
669
670 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr));
671 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID;
672 /* post an indication that all recv flush errors were consumed */
673 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr);
674 if (err) {
675 ISER_ERR("conn %p failed to post recv_beacon", ib_conn);
676 mtx_unlock(&ib_conn->beacon.flush_lock);
677 goto out;
678 }
679
680 ISER_DBG("before recv cv_wait: %p", iser_conn);
681 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
682 mtx_unlock(&ib_conn->beacon.flush_lock);
683 ISER_DBG("after recv cv_wait: %p", iser_conn);
684 }
685 out:
686 sx_xunlock(&ig.close_conns_mutex);
687 return (1);
688 }
689
690 /**
691 * Called with state mutex held
692 **/
693 static void
694 iser_connect_error(struct rdma_cm_id *cma_id)
695 {
696 struct iser_conn *iser_conn;
697
698 iser_conn = cma_id->context;
699
700 ISER_ERR("conn %p", iser_conn);
701
702 iser_conn->state = ISER_CONN_TERMINATING;
703
704 cv_signal(&iser_conn->up_cv);
705 }
706
707 /**
708 * Called with state mutex held
709 **/
710 static void
711 iser_addr_handler(struct rdma_cm_id *cma_id)
712 {
713 struct iser_device *device;
714 struct iser_conn *iser_conn;
715 struct ib_conn *ib_conn;
716 int ret;
717
718 iser_conn = cma_id->context;
719
720 ib_conn = &iser_conn->ib_conn;
721 device = iser_device_find_by_ib_device(cma_id);
722 if (!device) {
723 ISER_ERR("conn %p device lookup/creation failed",
724 iser_conn);
725 iser_connect_error(cma_id);
726 return;
727 }
728
729 ib_conn->device = device;
730
731 ret = rdma_resolve_route(cma_id, 1000);
732 if (ret) {
733 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret);
734 iser_connect_error(cma_id);
735 return;
736 }
737 }
738
739 /**
740 * Called with state mutex held
741 **/
742 static void
743 iser_route_handler(struct rdma_cm_id *cma_id)
744 {
745 struct rdma_conn_param conn_param;
746 int ret;
747 struct iser_cm_hdr req_hdr;
748 struct iser_conn *iser_conn = cma_id->context;
749 struct ib_conn *ib_conn = &iser_conn->ib_conn;
750 struct iser_device *device = ib_conn->device;
751
752 ret = iser_create_ib_conn_res(ib_conn);
753 if (ret)
754 goto failure;
755
756 memset(&conn_param, 0, sizeof conn_param);
757 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
758 conn_param.retry_count = 7;
759 conn_param.rnr_retry_count = 6;
760 /*
761 * Initiaotr depth should not be set, but in order to compat
762 * with old targets, we keep this value set.
763 */
764 conn_param.initiator_depth = 1;
765
766 memset(&req_hdr, 0, sizeof(req_hdr));
767 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
768 ISER_SEND_W_INV_NOT_SUPPORTED);
769 conn_param.private_data = (void *)&req_hdr;
770 conn_param.private_data_len = sizeof(struct iser_cm_hdr);
771
772 ret = rdma_connect(cma_id, &conn_param);
773 if (ret) {
774 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret);
775 goto failure;
776 }
777
778 return;
779 failure:
780 iser_connect_error(cma_id);
781 }
782
783 /**
784 * Called with state mutex held
785 **/
786 static void
787 iser_connected_handler(struct rdma_cm_id *cma_id)
788 {
789 struct iser_conn *iser_conn;
790 struct ib_qp_attr attr;
791 struct ib_qp_init_attr init_attr;
792
793 iser_conn = cma_id->context;
794
795 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
796
797 ISER_INFO("remote qpn:%x my qpn:%x",
798 attr.dest_qp_num, cma_id->qp->qp_num);
799
800 iser_conn->state = ISER_CONN_UP;
801
802 cv_signal(&iser_conn->up_cv);
803 }
804
805 /**
806 * Called with state mutex held
807 **/
808 static void
809 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy)
810 {
811 struct iser_conn *iser_conn = cma_id->context;
812
813 if (iser_conn_terminate(iser_conn))
814 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
815
816 }
817
818 int
819 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
820 {
821 struct iser_conn *iser_conn;
822 int ret = 0;
823
824 iser_conn = cma_id->context;
825 ISER_INFO("event %d status %d conn %p id %p",
826 event->event, event->status, cma_id->context, cma_id);
827
828 sx_xlock(&iser_conn->state_mutex);
829 switch (event->event) {
830 case RDMA_CM_EVENT_ADDR_RESOLVED:
831 iser_addr_handler(cma_id);
832 break;
833 case RDMA_CM_EVENT_ROUTE_RESOLVED:
834 iser_route_handler(cma_id);
835 break;
836 case RDMA_CM_EVENT_ESTABLISHED:
837 iser_connected_handler(cma_id);
838 break;
839 case RDMA_CM_EVENT_ADDR_ERROR:
840 case RDMA_CM_EVENT_ROUTE_ERROR:
841 case RDMA_CM_EVENT_CONNECT_ERROR:
842 case RDMA_CM_EVENT_UNREACHABLE:
843 case RDMA_CM_EVENT_REJECTED:
844 iser_connect_error(cma_id);
845 break;
846 case RDMA_CM_EVENT_DISCONNECTED:
847 case RDMA_CM_EVENT_ADDR_CHANGE:
848 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
849 iser_cleanup_handler(cma_id, false);
850 break;
851 default:
852 ISER_ERR("Unexpected RDMA CM event (%d)", event->event);
853 break;
854 }
855 sx_xunlock(&iser_conn->state_mutex);
856
857 return (ret);
858 }
859
860 int
861 iser_post_recvl(struct iser_conn *iser_conn)
862 {
863 const struct ib_recv_wr *rx_wr_failed;
864 struct ib_recv_wr rx_wr;
865 struct ib_conn *ib_conn = &iser_conn->ib_conn;
866 struct ib_sge sge;
867 int ib_ret;
868
869 sge.addr = iser_conn->login_resp_dma;
870 sge.length = ISER_RX_LOGIN_SIZE;
871 sge.lkey = ib_conn->device->mr->lkey;
872
873 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
874 rx_wr.sg_list = &sge;
875 rx_wr.num_sge = 1;
876 rx_wr.next = NULL;
877
878 ib_conn->post_recv_buf_count++;
879 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
880 if (ib_ret) {
881 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
882 ib_conn->post_recv_buf_count--;
883 }
884
885 return (ib_ret);
886 }
887
888 int
889 iser_post_recvm(struct iser_conn *iser_conn, int count)
890 {
891 const struct ib_recv_wr *rx_wr_failed;
892 struct ib_recv_wr *rx_wr;
893 int i, ib_ret;
894 struct ib_conn *ib_conn = &iser_conn->ib_conn;
895 unsigned int my_rx_head = iser_conn->rx_desc_head;
896 struct iser_rx_desc *rx_desc;
897
898 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
899 rx_desc = &iser_conn->rx_descs[my_rx_head];
900 rx_wr->wr_id = (uintptr_t)rx_desc;
901 rx_wr->sg_list = &rx_desc->rx_sg;
902 rx_wr->num_sge = 1;
903 rx_wr->next = rx_wr + 1;
904 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos;
905 }
906
907 rx_wr--;
908 rx_wr->next = NULL; /* mark end of work requests list */
909
910 ib_conn->post_recv_buf_count += count;
911 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
912 if (ib_ret) {
913 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
914 ib_conn->post_recv_buf_count -= count;
915 } else
916 iser_conn->rx_desc_head = my_rx_head;
917
918 return (ib_ret);
919 }
920
921 /**
922 * iser_start_send - Initiate a Send DTO operation
923 *
924 * returns 0 on success, -1 on failure
925 */
926 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
927 bool signal)
928 {
929 int ib_ret;
930 const struct ib_send_wr *send_wr_failed;
931 struct ib_send_wr send_wr;
932
933 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
934 tx_desc->dma_addr, ISER_HEADERS_LEN,
935 DMA_TO_DEVICE);
936
937 send_wr.next = NULL;
938 send_wr.wr_id = (uintptr_t)tx_desc;
939 send_wr.sg_list = tx_desc->tx_sg;
940 send_wr.num_sge = tx_desc->num_sge;
941 send_wr.opcode = IB_WR_SEND;
942 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
943
944 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
945 if (ib_ret)
946 ISER_ERR("ib_post_send failed, ret:%d", ib_ret);
947
948 return (ib_ret);
949 }
Cache object: 8ccc78dbbcdae270bd903b16d4305c07
|