1 /* $FreeBSD$ */
2 /*-
3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include "icl_iser.h"
28
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
30 "iSER module");
31 int iser_debug = 0;
32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
33 &iser_debug, 0, "Enable iser debug messages");
34
35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
36 static uma_zone_t icl_pdu_zone;
37
38 static volatile u_int icl_iser_ncons;
39 struct iser_global ig;
40
41 static void iser_conn_release(struct icl_conn *ic);
42
43 static icl_conn_new_pdu_t iser_conn_new_pdu;
44 static icl_conn_pdu_free_t iser_conn_pdu_free;
45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
46 static icl_conn_pdu_append_bio_t iser_conn_pdu_append_bio;
47 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data;
48 static icl_conn_pdu_queue_t iser_conn_pdu_queue;
49 static icl_conn_handoff_t iser_conn_handoff;
50 static icl_conn_free_t iser_conn_free;
51 static icl_conn_close_t iser_conn_close;
52 static icl_conn_connect_t iser_conn_connect;
53 static icl_conn_task_setup_t iser_conn_task_setup;
54 static icl_conn_task_done_t iser_conn_task_done;
55 static icl_conn_pdu_get_bio_t iser_conn_pdu_get_bio;
56 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data;
57
58 static kobj_method_t icl_iser_methods[] = {
59 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
60 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
61 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
62 KOBJMETHOD(icl_conn_pdu_append_bio, iser_conn_pdu_append_bio),
63 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
64 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
65 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
66 KOBJMETHOD(icl_conn_free, iser_conn_free),
67 KOBJMETHOD(icl_conn_close, iser_conn_close),
68 KOBJMETHOD(icl_conn_connect, iser_conn_connect),
69 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
70 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
71 KOBJMETHOD(icl_conn_pdu_get_bio, iser_conn_pdu_get_bio),
72 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
73 { 0, 0 }
74 };
75
76 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
77
78 /**
79 * iser_initialize_headers() - Initialize task headers
80 * @pdu: iser pdu
81 * @iser_conn: iser connection
82 *
83 * Notes:
84 * This routine may race with iser teardown flow for scsi
85 * error handling TMFs. So for TMF we should acquire the
86 * state mutex to avoid dereferencing the IB device which
87 * may have already been terminated (racing teardown sequence).
88 */
89 int
90 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
91 {
92 struct iser_tx_desc *tx_desc = &pdu->desc;
93 struct iser_device *device = iser_conn->ib_conn.device;
94 u64 dma_addr;
95 int ret = 0;
96
97 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
98 ISER_HEADERS_LEN, DMA_TO_DEVICE);
99 if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
100 ret = -ENOMEM;
101 goto out;
102 }
103
104 tx_desc->mapped = true;
105 tx_desc->dma_addr = dma_addr;
106 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
107 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
108 tx_desc->tx_sg[0].lkey = device->mr->lkey;
109
110 out:
111
112 return (ret);
113 }
114
115 int
116 iser_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request,
117 struct bio *bp, size_t offset, size_t len, int flags)
118 {
119 MPASS(!((request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) ==
120 ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
121 (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) ==
122 ISCSI_BHS_OPCODE_TEXT_REQUEST));
123
124 return (0);
125 }
126
127 int
128 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
129 const void *addr, size_t len, int flags)
130 {
131 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
132
133 switch (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) {
134 case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
135 case ISCSI_BHS_OPCODE_TEXT_REQUEST:
136 ISER_DBG("copy to login buff");
137 memcpy(iser_conn->login_req_buf, addr, len);
138 request->ip_data_len = len;
139 break;
140 }
141
142 return (0);
143 }
144
145 void
146 iser_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip,
147 size_t pdu_off, struct bio *bp, size_t bio_off,
148 size_t len)
149 {
150 MPASS(ip->ip_data_mbuf == NULL);
151 }
152
153 void
154 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
155 size_t off, void *addr, size_t len)
156 {
157 /* If we have a receive data, copy it to upper layer buffer */
158 if (ip->ip_data_mbuf)
159 memcpy(addr, ip->ip_data_mbuf + off, len);
160 }
161
162 /*
163 * Allocate icl_pdu with empty BHS to fill up by the caller.
164 */
165 struct icl_pdu *
166 iser_new_pdu(struct icl_conn *ic, int flags)
167 {
168 struct icl_iser_pdu *iser_pdu;
169 struct icl_pdu *ip;
170 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
171
172 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
173 if (iser_pdu == NULL) {
174 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
175 return (NULL);
176 }
177
178 iser_pdu->iser_conn = iser_conn;
179 ip = &iser_pdu->icl_pdu;
180 ip->ip_conn = ic;
181 ip->ip_bhs = &iser_pdu->desc.iscsi_header;
182
183 return (ip);
184 }
185
186 struct icl_pdu *
187 iser_conn_new_pdu(struct icl_conn *ic, int flags)
188 {
189 return (iser_new_pdu(ic, flags));
190 }
191
192 void
193 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
194 {
195 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
196
197 uma_zfree(icl_pdu_zone, iser_pdu);
198 }
199
200 size_t
201 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
202 const struct icl_pdu *request)
203 {
204 uint32_t len = 0;
205
206 len += request->ip_bhs->bhs_data_segment_len[0];
207 len <<= 8;
208 len += request->ip_bhs->bhs_data_segment_len[1];
209 len <<= 8;
210 len += request->ip_bhs->bhs_data_segment_len[2];
211
212 return (len);
213 }
214
215 void
216 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
217 {
218 iser_pdu_free(ic, ip);
219 }
220
221 static bool
222 is_control_opcode(uint8_t opcode)
223 {
224 bool is_control = false;
225
226 switch (opcode & ISCSI_OPCODE_MASK) {
227 case ISCSI_BHS_OPCODE_NOP_OUT:
228 case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
229 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
230 case ISCSI_BHS_OPCODE_TEXT_REQUEST:
231 is_control = true;
232 break;
233 case ISCSI_BHS_OPCODE_SCSI_COMMAND:
234 is_control = false;
235 break;
236 default:
237 ISER_ERR("unknown opcode %d", opcode);
238 }
239
240 return (is_control);
241 }
242
243 void
244 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
245 {
246 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
247 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
248 int ret;
249
250 if (iser_conn->state != ISER_CONN_UP)
251 return;
252
253 ret = iser_initialize_headers(iser_pdu, iser_conn);
254 if (ret) {
255 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
256 return;
257 }
258
259 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
260 ret = iser_send_control(iser_conn, iser_pdu);
261 if (unlikely(ret))
262 ISER_ERR("Failed to send control pdu %p", iser_pdu);
263 } else {
264 ret = iser_send_command(iser_conn, iser_pdu);
265 if (unlikely(ret))
266 ISER_ERR("Failed to send command pdu %p", iser_pdu);
267 }
268 }
269
270 static struct icl_conn *
271 iser_new_conn(const char *name, struct mtx *lock)
272 {
273 struct iser_conn *iser_conn;
274 struct icl_conn *ic;
275
276 refcount_acquire(&icl_iser_ncons);
277
278 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
279 if (!iser_conn) {
280 ISER_ERR("failed to allocate iser conn");
281 refcount_release(&icl_iser_ncons);
282 return (NULL);
283 }
284
285 cv_init(&iser_conn->up_cv, "iser_cv");
286 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
287 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF);
288 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
289 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF);
290
291 ic = &iser_conn->icl_conn;
292 ic->ic_lock = lock;
293 ic->ic_name = name;
294 ic->ic_offload = strdup("iser", M_TEMP);
295 ic->ic_iser = true;
296 ic->ic_unmapped = true;
297
298 return (ic);
299 }
300
301 void
302 iser_conn_free(struct icl_conn *ic)
303 {
304 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
305
306 iser_conn_release(ic);
307 mtx_destroy(&iser_conn->ib_conn.lock);
308 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
309 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
310 sx_destroy(&iser_conn->state_mutex);
311 cv_destroy(&iser_conn->up_cv);
312 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
313 refcount_release(&icl_iser_ncons);
314 }
315
316 int
317 iser_conn_handoff(struct icl_conn *ic, int fd)
318 {
319 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
320 int error = 0;
321
322 sx_xlock(&iser_conn->state_mutex);
323 if (iser_conn->state != ISER_CONN_UP) {
324 error = EINVAL;
325 ISER_ERR("iser_conn %p state is %d, teardown started\n",
326 iser_conn, iser_conn->state);
327 goto out;
328 }
329
330 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
331 if (error)
332 goto out;
333
334 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
335 if (error)
336 goto post_error;
337
338 iser_conn->handoff_done = true;
339
340 sx_xunlock(&iser_conn->state_mutex);
341 return (error);
342
343 post_error:
344 iser_free_rx_descriptors(iser_conn);
345 out:
346 sx_xunlock(&iser_conn->state_mutex);
347 return (error);
348
349 }
350
351 /**
352 * Frees all conn objects
353 */
354 static void
355 iser_conn_release(struct icl_conn *ic)
356 {
357 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
358 struct ib_conn *ib_conn = &iser_conn->ib_conn;
359 struct iser_conn *curr, *tmp;
360
361 mtx_lock(&ig.connlist_mutex);
362 /*
363 * Search for iser connection in global list.
364 * It may not be there in case of failure in connection establishment
365 * stage.
366 */
367 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
368 if (iser_conn == curr) {
369 ISER_WARN("found iser_conn %p", iser_conn);
370 list_del(&iser_conn->conn_list);
371 }
372 }
373 mtx_unlock(&ig.connlist_mutex);
374
375 /*
376 * In case we reconnecting or removing session, we need to
377 * release IB resources (which is safe to call more than once).
378 */
379 sx_xlock(&iser_conn->state_mutex);
380 iser_free_ib_conn_res(iser_conn, true);
381 sx_xunlock(&iser_conn->state_mutex);
382
383 if (ib_conn->cma_id != NULL) {
384 rdma_destroy_id(ib_conn->cma_id);
385 ib_conn->cma_id = NULL;
386 }
387
388 }
389
390 void
391 iser_conn_close(struct icl_conn *ic)
392 {
393 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
394
395 ISER_INFO("closing conn %p", iser_conn);
396
397 sx_xlock(&iser_conn->state_mutex);
398 /*
399 * In case iser connection is waiting on conditional variable
400 * (state PENDING) and we try to close it before connection establishment,
401 * we need to signal it to continue releasing connection properly.
402 */
403 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
404 cv_signal(&iser_conn->up_cv);
405 sx_xunlock(&iser_conn->state_mutex);
406
407 }
408
409 int
410 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
411 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
412 {
413 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
414 struct ib_conn *ib_conn = &iser_conn->ib_conn;
415 int err = 0;
416
417 iser_conn_release(ic);
418
419 sx_xlock(&iser_conn->state_mutex);
420 /* the device is known only --after-- address resolution */
421 ib_conn->device = NULL;
422 iser_conn->handoff_done = false;
423
424 iser_conn->state = ISER_CONN_PENDING;
425
426 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn,
427 RDMA_PS_TCP, IB_QPT_RC);
428 if (IS_ERR(ib_conn->cma_id)) {
429 err = -PTR_ERR(ib_conn->cma_id);
430 ISER_ERR("rdma_create_id failed: %d", err);
431 goto id_failure;
432 }
433
434 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
435 if (err) {
436 ISER_ERR("rdma_resolve_addr failed: %d", err);
437 if (err < 0)
438 err = -err;
439 goto addr_failure;
440 }
441
442 ISER_DBG("before cv_wait: %p", iser_conn);
443 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
444 ISER_DBG("after cv_wait: %p", iser_conn);
445
446 if (iser_conn->state != ISER_CONN_UP) {
447 err = EIO;
448 goto addr_failure;
449 }
450
451 err = iser_alloc_login_buf(iser_conn);
452 if (err)
453 goto addr_failure;
454 sx_xunlock(&iser_conn->state_mutex);
455
456 mtx_lock(&ig.connlist_mutex);
457 list_add(&iser_conn->conn_list, &ig.connlist);
458 mtx_unlock(&ig.connlist_mutex);
459
460 return (0);
461
462 id_failure:
463 ib_conn->cma_id = NULL;
464 addr_failure:
465 sx_xunlock(&iser_conn->state_mutex);
466 return (err);
467 }
468
469 int
470 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
471 struct ccb_scsiio *csio,
472 uint32_t *task_tagp, void **prvp)
473 {
474 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
475
476 *prvp = ip;
477 iser_pdu->csio = csio;
478
479 return (0);
480 }
481
482 void
483 iser_conn_task_done(struct icl_conn *ic, void *prv)
484 {
485 struct icl_pdu *ip = prv;
486 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
487 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
488 struct iser_tx_desc *tx_desc = &iser_pdu->desc;
489
490 if (iser_pdu->dir[ISER_DIR_IN]) {
491 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
492 iser_dma_unmap_task_data(iser_pdu,
493 &iser_pdu->data[ISER_DIR_IN],
494 DMA_FROM_DEVICE);
495 }
496
497 if (iser_pdu->dir[ISER_DIR_OUT]) {
498 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
499 iser_dma_unmap_task_data(iser_pdu,
500 &iser_pdu->data[ISER_DIR_OUT],
501 DMA_TO_DEVICE);
502 }
503
504 if (likely(tx_desc->mapped)) {
505 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
506 ISER_HEADERS_LEN, DMA_TO_DEVICE);
507 tx_desc->mapped = false;
508 }
509
510 iser_pdu_free(ic, ip);
511 }
512
513 static int
514 iser_limits(struct icl_drv_limits *idl, int socket)
515 {
516
517 idl->idl_max_recv_data_segment_length = 128 * 1024;
518 idl->idl_max_send_data_segment_length = 128 * 1024;
519 idl->idl_max_burst_length = 262144;
520 idl->idl_first_burst_length = 65536;
521
522 return (0);
523 }
524
525 static int
526 icl_iser_load(void)
527 {
528 int error;
529
530 ISER_DBG("Starting iSER datamover...");
531
532 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
533 NULL, NULL, NULL, NULL,
534 UMA_ALIGN_PTR, 0);
535 /* FIXME: Check rc */
536
537 refcount_init(&icl_iser_ncons, 0);
538
539 error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
540 KASSERT(error == 0, ("failed to register iser"));
541
542 memset(&ig, 0, sizeof(struct iser_global));
543
544 /* device init is called only after the first addr resolution */
545 sx_init(&ig.device_list_mutex, "global_device_lock");
546 INIT_LIST_HEAD(&ig.device_list);
547 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF);
548 INIT_LIST_HEAD(&ig.connlist);
549 sx_init(&ig.close_conns_mutex, "global_close_conns_lock");
550
551 return (error);
552 }
553
554 static int
555 icl_iser_unload(void)
556 {
557 ISER_DBG("Removing iSER datamover...");
558
559 if (icl_iser_ncons != 0)
560 return (EBUSY);
561
562 sx_destroy(&ig.close_conns_mutex);
563 mtx_destroy(&ig.connlist_mutex);
564 sx_destroy(&ig.device_list_mutex);
565
566 icl_unregister("iser", true);
567
568 uma_zdestroy(icl_pdu_zone);
569
570 return (0);
571 }
572
573 static int
574 icl_iser_modevent(module_t mod, int what, void *arg)
575 {
576 switch (what) {
577 case MOD_LOAD:
578 return (icl_iser_load());
579 case MOD_UNLOAD:
580 return (icl_iser_unload());
581 default:
582 return (EINVAL);
583 }
584 }
585
586 moduledata_t icl_iser_data = {
587 .name = "icl_iser",
588 .evhand = icl_iser_modevent,
589 .priv = 0
590 };
591
592 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
593 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
594 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
595 MODULE_VERSION(icl_iser, 1);
Cache object: 965ecb634a66e003e37a597601c4efc7
|