1 /*-
2 * Copyright (c) 2012 Chelsio Communications, Inc.
3 * All rights reserved.
4 *
5 * Chelsio T5xx iSCSI driver
6 *
7 * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/ktr.h>
41 #include <sys/module.h>
42 #include <sys/systm.h>
43
44 #ifdef TCP_OFFLOAD
45 #include <sys/errno.h>
46 #include <sys/gsb_crc32.h>
47 #include <sys/kthread.h>
48 #include <sys/smp.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/mbuf.h>
52 #include <sys/lock.h>
53 #include <sys/mutex.h>
54 #include <sys/condvar.h>
55 #include <sys/uio.h>
56
57 #include <netinet/in.h>
58 #include <netinet/in_pcb.h>
59 #include <netinet/toecore.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/tcp_fsm.h>
62
63 #include <cam/scsi/scsi_all.h>
64 #include <cam/scsi/scsi_da.h>
65 #include <cam/ctl/ctl_io.h>
66 #include <cam/ctl/ctl.h>
67 #include <cam/ctl/ctl_backend.h>
68 #include <cam/ctl/ctl_error.h>
69 #include <cam/ctl/ctl_frontend.h>
70 #include <cam/ctl/ctl_debug.h>
71 #include <cam/ctl/ctl_ha.h>
72 #include <cam/ctl/ctl_ioctl.h>
73
74 #include <dev/iscsi/icl.h>
75 #include <dev/iscsi/iscsi_proto.h>
76 #include <dev/iscsi/iscsi_ioctl.h>
77 #include <dev/iscsi/iscsi.h>
78 #include <cam/ctl/ctl_frontend_iscsi.h>
79
80 #include <cam/cam.h>
81 #include <cam/cam_ccb.h>
82 #include <cam/cam_xpt.h>
83 #include <cam/cam_debug.h>
84 #include <cam/cam_sim.h>
85 #include <cam/cam_xpt_sim.h>
86 #include <cam/cam_xpt_periph.h>
87 #include <cam/cam_periph.h>
88 #include <cam/cam_compat.h>
89 #include <cam/scsi/scsi_message.h>
90
91 #include "common/common.h"
92 #include "common/t4_msg.h"
93 #include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */
94 #include "tom/t4_tom.h"
95 #include "cxgbei.h"
96
97 static void
98 read_pdu_limits(struct adapter *sc, uint32_t *max_tx_data_len,
99 uint32_t *max_rx_data_len, struct ppod_region *pr)
100 {
101 uint32_t tx_len, rx_len, r, v;
102
103 rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE);
104 tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
105
106 r = t4_read_reg(sc, A_TP_PARA_REG2);
107 rx_len = min(rx_len, G_MAXRXDATA(r));
108 tx_len = min(tx_len, G_MAXRXDATA(r));
109
110 r = t4_read_reg(sc, A_TP_PARA_REG7);
111 v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
112 rx_len = min(rx_len, v);
113 tx_len = min(tx_len, v);
114
115 /*
116 * AHS is not supported by the kernel so we'll not account for
117 * it either in our PDU len -> data segment len conversions.
118 */
119 rx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
120 ISCSI_DATA_DIGEST_SIZE;
121 tx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
122 ISCSI_DATA_DIGEST_SIZE;
123
124 /*
125 * DDP can place only 4 pages for a single PDU. A single
126 * request might use larger pages than the smallest page size,
127 * but that cannot be guaranteed. Assume the smallest DDP
128 * page size for this limit.
129 */
130 rx_len = min(rx_len, 4 * (1U << pr->pr_page_shift[0]));
131
132 if (chip_id(sc) == CHELSIO_T5) {
133 tx_len = min(tx_len, 15360);
134
135 rx_len = rounddown2(rx_len, 512);
136 tx_len = rounddown2(tx_len, 512);
137 }
138
139 *max_tx_data_len = tx_len;
140 *max_rx_data_len = rx_len;
141 }
142
143 /*
144 * Initialize the software state of the iSCSI ULP driver.
145 *
146 * ENXIO means firmware didn't set up something that it was supposed to.
147 */
148 static int
149 cxgbei_init(struct adapter *sc, struct cxgbei_data *ci)
150 {
151 struct sysctl_oid *oid;
152 struct sysctl_oid_list *children;
153 struct ppod_region *pr;
154 uint32_t r;
155 int rc;
156
157 MPASS(sc->vres.iscsi.size > 0);
158 MPASS(ci != NULL);
159
160 pr = &ci->pr;
161 r = t4_read_reg(sc, A_ULP_RX_ISCSI_PSZ);
162 rc = t4_init_ppod_region(pr, &sc->vres.iscsi, r, "iSCSI page pods");
163 if (rc != 0) {
164 device_printf(sc->dev,
165 "%s: failed to initialize the iSCSI page pod region: %u.\n",
166 __func__, rc);
167 return (rc);
168 }
169
170 read_pdu_limits(sc, &ci->max_tx_data_len, &ci->max_rx_data_len, pr);
171
172 sysctl_ctx_init(&ci->ctx);
173 oid = device_get_sysctl_tree(sc->dev); /* dev.t5nex.X */
174 children = SYSCTL_CHILDREN(oid);
175
176 oid = SYSCTL_ADD_NODE(&ci->ctx, children, OID_AUTO, "iscsi",
177 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "iSCSI ULP settings");
178 children = SYSCTL_CHILDREN(oid);
179
180 ci->ddp_threshold = 2048;
181 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "ddp_threshold",
182 CTLFLAG_RW, &ci->ddp_threshold, 0, "Rx zero copy threshold");
183
184 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_rx_data_len",
185 CTLFLAG_RW, &ci->max_rx_data_len, 0,
186 "Maximum receive data segment length");
187 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_tx_data_len",
188 CTLFLAG_RW, &ci->max_tx_data_len, 0,
189 "Maximum transmit data segment length");
190
191 return (0);
192 }
193
194 static int
195 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
196 {
197 struct adapter *sc = iq->adapter;
198 struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
199 u_int tid = GET_TID(cpl);
200 struct toepcb *toep = lookup_tid(sc, tid);
201 struct icl_pdu *ip;
202 struct icl_cxgbei_pdu *icp;
203 uint16_t len_ddp = be16toh(cpl->pdu_len_ddp);
204 uint16_t len = be16toh(cpl->len);
205
206 M_ASSERTPKTHDR(m);
207 MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
208
209 ip = icl_cxgbei_new_pdu(M_NOWAIT);
210 if (ip == NULL)
211 CXGBE_UNIMPLEMENTED("PDU allocation failure");
212 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
213 ip->ip_data_len = G_ISCSI_PDU_LEN(len_ddp) - len;
214 icp = ip_to_icp(ip);
215 icp->icp_seq = ntohl(cpl->seq);
216 icp->icp_flags = ICPF_RX_HDR;
217
218 /* This is the start of a new PDU. There should be no old state. */
219 MPASS(toep->ulpcb2 == NULL);
220 toep->ulpcb2 = icp;
221
222 #if 0
223 CTR5(KTR_CXGBE, "%s: tid %u, cpl->len %u, pdu_len_ddp 0x%04x, icp %p",
224 __func__, tid, len, len_ddp, icp);
225 #endif
226
227 m_freem(m);
228 return (0);
229 }
230
231 static int
232 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
233 {
234 struct adapter *sc = iq->adapter;
235 struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *);
236 u_int tid = GET_TID(cpl);
237 struct toepcb *toep = lookup_tid(sc, tid);
238 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
239 struct icl_pdu *ip;
240
241 M_ASSERTPKTHDR(m);
242 MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
243
244 if (icp == NULL) {
245 /*
246 * T6 completion enabled, start of a new pdu. Header
247 * will come in completion CPL.
248 */
249 ip = icl_cxgbei_new_pdu(M_NOWAIT);
250 if (ip == NULL)
251 CXGBE_UNIMPLEMENTED("PDU allocation failure");
252 icp = ip_to_icp(ip);
253 } else {
254 /* T5 mode, header is already received. */
255 MPASS(icp->icp_flags == ICPF_RX_HDR);
256 MPASS(icp->ip.ip_data_mbuf == NULL);
257 MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
258 }
259
260 /* Trim the cpl header from mbuf. */
261 m_adj(m, sizeof(*cpl));
262
263 icp->icp_flags |= ICPF_RX_FLBUF;
264 icp->ip.ip_data_mbuf = m;
265 toep->ofld_rxq->rx_iscsi_fl_pdus++;
266 toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
267
268 /*
269 * For T6, save the icp for further processing in the
270 * completion handler.
271 */
272 if (icp->icp_flags == ICPF_RX_FLBUF) {
273 MPASS(toep->ulpcb2 == NULL);
274 toep->ulpcb2 = icp;
275 }
276
277 #if 0
278 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
279 be16toh(cpl->len), icp);
280 #endif
281
282 return (0);
283 }
284
285 static int
286 mbuf_crc32c_helper(void *arg, void *data, u_int len)
287 {
288 uint32_t *digestp = arg;
289
290 *digestp = calculate_crc32c(*digestp, data, len);
291 return (0);
292 }
293
294 static struct icl_pdu *
295 parse_pdu(struct socket *so, struct toepcb *toep, struct icl_cxgbei_conn *icc,
296 struct sockbuf *sb, u_int total_len)
297 {
298 struct uio uio;
299 struct iovec iov[2];
300 struct iscsi_bhs bhs;
301 struct mbuf *m;
302 struct icl_pdu *ip;
303 u_int ahs_len, data_len, header_len, pdu_len;
304 uint32_t calc_digest, wire_digest;
305 int error;
306
307 uio.uio_segflg = UIO_SYSSPACE;
308 uio.uio_rw = UIO_READ;
309 uio.uio_td = curthread;
310
311 header_len = sizeof(struct iscsi_bhs);
312 if (icc->ic.ic_header_crc32c)
313 header_len += ISCSI_HEADER_DIGEST_SIZE;
314
315 if (total_len < header_len) {
316 ICL_WARN("truncated pre-offload PDU with len %u", total_len);
317 return (NULL);
318 }
319
320 iov[0].iov_base = &bhs;
321 iov[0].iov_len = sizeof(bhs);
322 iov[1].iov_base = &wire_digest;
323 iov[1].iov_len = sizeof(wire_digest);
324 uio.uio_iov = iov;
325 uio.uio_iovcnt = 1;
326 uio.uio_offset = 0;
327 uio.uio_resid = header_len;
328 error = soreceive(so, NULL, &uio, NULL, NULL, NULL);
329 if (error != 0) {
330 ICL_WARN("failed to read BHS from pre-offload PDU: %d", error);
331 return (NULL);
332 }
333
334 ahs_len = bhs.bhs_total_ahs_len * 4;
335 data_len = bhs.bhs_data_segment_len[0] << 16 |
336 bhs.bhs_data_segment_len[1] << 8 |
337 bhs.bhs_data_segment_len[2];
338 pdu_len = header_len + ahs_len + roundup2(data_len, 4);
339 if (icc->ic.ic_data_crc32c && data_len != 0)
340 pdu_len += ISCSI_DATA_DIGEST_SIZE;
341
342 if (total_len < pdu_len) {
343 ICL_WARN("truncated pre-offload PDU len %u vs %u", total_len,
344 pdu_len);
345 return (NULL);
346 }
347
348 if (ahs_len != 0) {
349 ICL_WARN("received pre-offload PDU with AHS");
350 return (NULL);
351 }
352
353 if (icc->ic.ic_header_crc32c) {
354 calc_digest = calculate_crc32c(0xffffffff, (caddr_t)&bhs,
355 sizeof(bhs));
356 calc_digest ^= 0xffffffff;
357 if (calc_digest != wire_digest) {
358 ICL_WARN("received pre-offload PDU 0x%02x with "
359 "invalid header digest (0x%x vs 0x%x)",
360 bhs.bhs_opcode, wire_digest, calc_digest);
361 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
362 return (NULL);
363 }
364 }
365
366 m = NULL;
367 if (data_len != 0) {
368 uio.uio_iov = NULL;
369 uio.uio_resid = roundup2(data_len, 4);
370 if (icc->ic.ic_data_crc32c)
371 uio.uio_resid += ISCSI_DATA_DIGEST_SIZE;
372
373 error = soreceive(so, NULL, &uio, &m, NULL, NULL);
374 if (error != 0) {
375 ICL_WARN("failed to read data payload from "
376 "pre-offload PDU: %d", error);
377 return (NULL);
378 }
379
380 if (icc->ic.ic_data_crc32c) {
381 m_copydata(m, roundup2(data_len, 4),
382 sizeof(wire_digest), (caddr_t)&wire_digest);
383
384 calc_digest = 0xffffffff;
385 m_apply(m, 0, roundup2(data_len, 4), mbuf_crc32c_helper,
386 &calc_digest);
387 calc_digest ^= 0xffffffff;
388 if (calc_digest != wire_digest) {
389 ICL_WARN("received pre-offload PDU 0x%02x "
390 "with invalid data digest (0x%x vs 0x%x)",
391 bhs.bhs_opcode, wire_digest, calc_digest);
392 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
393 m_freem(m);
394 return (NULL);
395 }
396 }
397 }
398
399 ip = icl_cxgbei_new_pdu(M_WAITOK);
400 icl_cxgbei_new_pdu_set_conn(ip, &icc->ic);
401 *ip->ip_bhs = bhs;
402 ip->ip_data_len = data_len;
403 ip->ip_data_mbuf = m;
404 return (ip);
405 }
406
407 void
408 parse_pdus(struct icl_cxgbei_conn *icc, struct sockbuf *sb)
409 {
410 struct icl_conn *ic = &icc->ic;
411 struct socket *so = ic->ic_socket;
412 struct toepcb *toep = icc->toep;
413 struct icl_pdu *ip, *lastip;
414 u_int total_len;
415
416 SOCKBUF_LOCK_ASSERT(sb);
417
418 CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, toep->tid,
419 sbused(sb));
420
421 lastip = NULL;
422 while (sbused(sb) != 0 && (sb->sb_state & SBS_CANTRCVMORE) == 0) {
423 total_len = sbused(sb);
424 SOCKBUF_UNLOCK(sb);
425
426 ip = parse_pdu(so, toep, icc, sb, total_len);
427
428 if (ip == NULL) {
429 ic->ic_error(ic);
430 SOCKBUF_LOCK(sb);
431 return;
432 }
433
434 if (lastip == NULL)
435 STAILQ_INSERT_HEAD(&icc->rcvd_pdus, ip, ip_next);
436 else
437 STAILQ_INSERT_AFTER(&icc->rcvd_pdus, lastip, ip,
438 ip_next);
439 lastip = ip;
440
441 SOCKBUF_LOCK(sb);
442 }
443 }
444
445 static int
446 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
447 {
448 struct adapter *sc = iq->adapter;
449 const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
450 u_int tid = GET_TID(cpl);
451 struct toepcb *toep = lookup_tid(sc, tid);
452 struct inpcb *inp = toep->inp;
453 struct socket *so;
454 struct sockbuf *sb;
455 struct tcpcb *tp;
456 struct icl_cxgbei_conn *icc;
457 struct icl_conn *ic;
458 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
459 struct icl_pdu *ip;
460 u_int pdu_len, val;
461 struct epoch_tracker et;
462
463 MPASS(m == NULL);
464
465 /* Must already be assembling a PDU. */
466 MPASS(icp != NULL);
467 MPASS(icp->icp_flags & ICPF_RX_HDR); /* Data is optional. */
468 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
469
470 pdu_len = be16toh(cpl->len); /* includes everything. */
471 val = be32toh(cpl->ddpvld);
472
473 #if 0
474 CTR5(KTR_CXGBE,
475 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x",
476 __func__, tid, pdu_len, val, icp->icp_flags);
477 #endif
478
479 icp->icp_flags |= ICPF_RX_STATUS;
480 ip = &icp->ip;
481 if (val & F_DDP_PADDING_ERR) {
482 ICL_WARN("received PDU 0x%02x with invalid padding",
483 ip->ip_bhs->bhs_opcode);
484 toep->ofld_rxq->rx_iscsi_padding_errors++;
485 }
486 if (val & F_DDP_HDRCRC_ERR) {
487 ICL_WARN("received PDU 0x%02x with invalid header digest",
488 ip->ip_bhs->bhs_opcode);
489 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
490 }
491 if (val & F_DDP_DATACRC_ERR) {
492 ICL_WARN("received PDU 0x%02x with invalid data digest",
493 ip->ip_bhs->bhs_opcode);
494 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
495 }
496 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
497 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
498 MPASS(ip->ip_data_len > 0);
499 icp->icp_flags |= ICPF_RX_DDP;
500 toep->ofld_rxq->rx_iscsi_ddp_pdus++;
501 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
502 }
503
504 INP_WLOCK(inp);
505 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
506 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
507 __func__, tid, pdu_len, inp->inp_flags);
508 INP_WUNLOCK(inp);
509 icl_cxgbei_conn_pdu_free(NULL, ip);
510 toep->ulpcb2 = NULL;
511 return (0);
512 }
513
514 /*
515 * T6+ does not report data PDUs received via DDP without F
516 * set. This can result in gaps in the TCP sequence space.
517 */
518 tp = intotcpcb(inp);
519 MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
520 tp->rcv_nxt = icp->icp_seq + pdu_len;
521 tp->t_rcvtime = ticks;
522
523 /*
524 * Don't update the window size or return credits since RX
525 * flow control is disabled.
526 */
527
528 so = inp->inp_socket;
529 sb = &so->so_rcv;
530 SOCKBUF_LOCK(sb);
531
532 icc = toep->ulpcb;
533 if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
534 CTR5(KTR_CXGBE,
535 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
536 __func__, tid, pdu_len, icc, sb->sb_state);
537 SOCKBUF_UNLOCK(sb);
538 INP_WUNLOCK(inp);
539
540 CURVNET_SET(so->so_vnet);
541 NET_EPOCH_ENTER(et);
542 INP_WLOCK(inp);
543 tp = tcp_drop(tp, ECONNRESET);
544 if (tp)
545 INP_WUNLOCK(inp);
546 NET_EPOCH_EXIT(et);
547 CURVNET_RESTORE();
548
549 icl_cxgbei_conn_pdu_free(NULL, ip);
550 toep->ulpcb2 = NULL;
551 return (0);
552 }
553 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
554 ic = &icc->ic;
555 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
556 F_DDP_DATACRC_ERR)) != 0) {
557 SOCKBUF_UNLOCK(sb);
558 INP_WUNLOCK(inp);
559
560 icl_cxgbei_conn_pdu_free(NULL, ip);
561 toep->ulpcb2 = NULL;
562 ic->ic_error(ic);
563 return (0);
564 }
565
566 icl_cxgbei_new_pdu_set_conn(ip, ic);
567
568 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
569 if (!icc->rx_active) {
570 icc->rx_active = true;
571 wakeup(&icc->rx_active);
572 }
573 SOCKBUF_UNLOCK(sb);
574 INP_WUNLOCK(inp);
575
576 toep->ulpcb2 = NULL;
577
578 return (0);
579 }
580
581 static int
582 do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
583 {
584 struct epoch_tracker et;
585 struct adapter *sc = iq->adapter;
586 struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
587 u_int tid = GET_TID(cpl);
588 struct toepcb *toep = lookup_tid(sc, tid);
589 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
590 struct icl_pdu *ip;
591 struct cxgbei_cmp *cmp;
592 struct inpcb *inp = toep->inp;
593 #ifdef INVARIANTS
594 uint16_t len = be16toh(cpl->len);
595 u_int data_digest_len;
596 #endif
597 struct socket *so;
598 struct sockbuf *sb;
599 struct tcpcb *tp;
600 struct icl_cxgbei_conn *icc;
601 struct icl_conn *ic;
602 struct iscsi_bhs_data_out *bhsdo;
603 u_int val = be32toh(cpl->ddpvld);
604 u_int npdus, pdu_len;
605 uint32_t prev_seg_len;
606
607 M_ASSERTPKTHDR(m);
608 MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
609
610 if ((val & F_DDP_PDU) == 0) {
611 MPASS(icp != NULL);
612 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
613 ip = &icp->ip;
614 }
615
616 if (icp == NULL) {
617 /* T6 completion enabled, start of a new PDU. */
618 ip = icl_cxgbei_new_pdu(M_NOWAIT);
619 if (ip == NULL)
620 CXGBE_UNIMPLEMENTED("PDU allocation failure");
621 icp = ip_to_icp(ip);
622 }
623 pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
624
625 #if 0
626 CTR5(KTR_CXGBE,
627 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
628 __func__, tid, pdu_len, val, icp);
629 #endif
630
631 /* Copy header */
632 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
633 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
634 ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
635 bhsdo->bhsdo_data_segment_len[1] << 8 |
636 bhsdo->bhsdo_data_segment_len[2];
637 icp->icp_seq = ntohl(cpl->seq);
638 icp->icp_flags |= ICPF_RX_HDR;
639 icp->icp_flags |= ICPF_RX_STATUS;
640
641 if (val & F_DDP_PADDING_ERR) {
642 ICL_WARN("received PDU 0x%02x with invalid padding",
643 ip->ip_bhs->bhs_opcode);
644 toep->ofld_rxq->rx_iscsi_padding_errors++;
645 }
646 if (val & F_DDP_HDRCRC_ERR) {
647 ICL_WARN("received PDU 0x%02x with invalid header digest",
648 ip->ip_bhs->bhs_opcode);
649 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
650 }
651 if (val & F_DDP_DATACRC_ERR) {
652 ICL_WARN("received PDU 0x%02x with invalid data digest",
653 ip->ip_bhs->bhs_opcode);
654 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
655 }
656
657 INP_WLOCK(inp);
658 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
659 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
660 __func__, tid, pdu_len, inp->inp_flags);
661 INP_WUNLOCK(inp);
662 icl_cxgbei_conn_pdu_free(NULL, ip);
663 toep->ulpcb2 = NULL;
664 m_freem(m);
665 return (0);
666 }
667
668 tp = intotcpcb(inp);
669
670 /*
671 * If icc is NULL, the connection is being closed in
672 * icl_cxgbei_conn_close(), just drop this data.
673 */
674 icc = toep->ulpcb;
675 if (__predict_false(icc == NULL)) {
676 CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
677 __func__, tid, pdu_len, icc);
678
679 /*
680 * Update rcv_nxt so the sequence number of the FIN
681 * doesn't appear wrong.
682 */
683 tp->rcv_nxt = icp->icp_seq + pdu_len;
684 tp->t_rcvtime = ticks;
685 INP_WUNLOCK(inp);
686
687 icl_cxgbei_conn_pdu_free(NULL, ip);
688 toep->ulpcb2 = NULL;
689 m_freem(m);
690 return (0);
691 }
692
693 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
694 ic = &icc->ic;
695 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
696 F_DDP_DATACRC_ERR)) != 0) {
697 INP_WUNLOCK(inp);
698
699 icl_cxgbei_conn_pdu_free(NULL, ip);
700 toep->ulpcb2 = NULL;
701 m_freem(m);
702 ic->ic_error(ic);
703 return (0);
704 }
705
706 #ifdef INVARIANTS
707 data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
708 ISCSI_DATA_DIGEST_SIZE : 0;
709 MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
710 #endif
711
712 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
713 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
714 MPASS(ip->ip_data_len > 0);
715 icp->icp_flags |= ICPF_RX_DDP;
716 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
717
718 switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
719 case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
720 cmp = cxgbei_find_cmp(icc,
721 be32toh(bhsdo->bhsdo_initiator_task_tag));
722 break;
723 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
724 cmp = cxgbei_find_cmp(icc,
725 be32toh(bhsdo->bhsdo_target_transfer_tag));
726 break;
727 default:
728 __assert_unreachable();
729 }
730 MPASS(cmp != NULL);
731
732 /*
733 * The difference between the end of the last burst
734 * and the offset of the last PDU in this burst is
735 * the additional data received via DDP.
736 */
737 prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
738 cmp->next_buffer_offset;
739
740 if (prev_seg_len != 0) {
741 uint32_t orig_datasn;
742
743 /*
744 * Return a "large" PDU representing the burst
745 * of PDUs. Adjust the offset and length of
746 * this PDU to represent the entire burst.
747 */
748 ip->ip_data_len += prev_seg_len;
749 bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
750 bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
751 bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
752 bhsdo->bhsdo_buffer_offset =
753 htobe32(cmp->next_buffer_offset);
754
755 orig_datasn = htobe32(bhsdo->bhsdo_datasn);
756 npdus = orig_datasn - cmp->last_datasn;
757 bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1);
758 cmp->last_datasn = orig_datasn;
759 ip->ip_additional_pdus = npdus - 1;
760 } else {
761 MPASS(htobe32(bhsdo->bhsdo_datasn) ==
762 cmp->last_datasn + 1);
763 npdus = 1;
764 cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
765 }
766
767 cmp->next_buffer_offset += ip->ip_data_len;
768 toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
769 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
770 } else {
771 MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
772 MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
773 }
774
775 tp->rcv_nxt = icp->icp_seq + pdu_len;
776 tp->t_rcvtime = ticks;
777
778 /*
779 * Don't update the window size or return credits since RX
780 * flow control is disabled.
781 */
782
783 so = inp->inp_socket;
784 sb = &so->so_rcv;
785 SOCKBUF_LOCK(sb);
786 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
787 CTR5(KTR_CXGBE,
788 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
789 __func__, tid, pdu_len, icc, sb->sb_state);
790 SOCKBUF_UNLOCK(sb);
791 INP_WUNLOCK(inp);
792
793 CURVNET_SET(so->so_vnet);
794 NET_EPOCH_ENTER(et);
795 INP_WLOCK(inp);
796 tp = tcp_drop(tp, ECONNRESET);
797 if (tp != NULL)
798 INP_WUNLOCK(inp);
799 NET_EPOCH_EXIT(et);
800 CURVNET_RESTORE();
801
802 icl_cxgbei_conn_pdu_free(NULL, ip);
803 toep->ulpcb2 = NULL;
804 m_freem(m);
805 return (0);
806 }
807
808 icl_cxgbei_new_pdu_set_conn(ip, ic);
809
810 /* Enqueue the PDU to the received pdus queue. */
811 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
812 if (!icc->rx_active) {
813 icc->rx_active = true;
814 wakeup(&icc->rx_active);
815 }
816 SOCKBUF_UNLOCK(sb);
817 INP_WUNLOCK(inp);
818
819 toep->ulpcb2 = NULL;
820 m_freem(m);
821
822 return (0);
823 }
824
825 static int
826 cxgbei_activate(struct adapter *sc)
827 {
828 struct cxgbei_data *ci;
829 int rc;
830
831 ASSERT_SYNCHRONIZED_OP(sc);
832
833 if (uld_active(sc, ULD_ISCSI)) {
834 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
835 __func__, sc));
836 return (0);
837 }
838
839 if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
840 device_printf(sc->dev,
841 "not iSCSI offload capable, or capability disabled.\n");
842 return (ENOSYS);
843 }
844
845 /* per-adapter softc for iSCSI */
846 ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_WAITOK);
847 if (ci == NULL)
848 return (ENOMEM);
849
850 rc = cxgbei_init(sc, ci);
851 if (rc != 0) {
852 free(ci, M_CXGBE);
853 return (rc);
854 }
855
856 sc->iscsi_ulp_softc = ci;
857
858 return (0);
859 }
860
861 static int
862 cxgbei_deactivate(struct adapter *sc)
863 {
864 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
865
866 ASSERT_SYNCHRONIZED_OP(sc);
867
868 if (ci != NULL) {
869 sysctl_ctx_free(&ci->ctx);
870 t4_free_ppod_region(&ci->pr);
871 free(ci, M_CXGBE);
872 sc->iscsi_ulp_softc = NULL;
873 }
874
875 return (0);
876 }
877
878 static void
879 cxgbei_activate_all(struct adapter *sc, void *arg __unused)
880 {
881
882 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
883 return;
884
885 /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
886 if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
887 (void) t4_activate_uld(sc, ULD_ISCSI);
888
889 end_synchronized_op(sc, 0);
890 }
891
892 static void
893 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
894 {
895
896 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
897 return;
898
899 if (uld_active(sc, ULD_ISCSI))
900 (void) t4_deactivate_uld(sc, ULD_ISCSI);
901
902 end_synchronized_op(sc, 0);
903 }
904
905 static struct uld_info cxgbei_uld_info = {
906 .uld_id = ULD_ISCSI,
907 .activate = cxgbei_activate,
908 .deactivate = cxgbei_deactivate,
909 };
910
911 static int
912 cxgbei_mod_load(void)
913 {
914 int rc;
915
916 t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
917 t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
918 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
919 t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp);
920
921 rc = t4_register_uld(&cxgbei_uld_info);
922 if (rc != 0)
923 return (rc);
924
925 t4_iterate(cxgbei_activate_all, NULL);
926
927 return (rc);
928 }
929
930 static int
931 cxgbei_mod_unload(void)
932 {
933
934 t4_iterate(cxgbei_deactivate_all, NULL);
935
936 if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY)
937 return (EBUSY);
938
939 t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
940 t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
941 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
942 t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL);
943
944 return (0);
945 }
946 #endif
947
948 static int
949 cxgbei_modevent(module_t mod, int cmd, void *arg)
950 {
951 int rc = 0;
952
953 #ifdef TCP_OFFLOAD
954 switch (cmd) {
955 case MOD_LOAD:
956 rc = cxgbei_mod_load();
957 if (rc == 0)
958 rc = icl_cxgbei_mod_load();
959 break;
960
961 case MOD_UNLOAD:
962 rc = icl_cxgbei_mod_unload();
963 if (rc == 0)
964 rc = cxgbei_mod_unload();
965 break;
966
967 default:
968 rc = EINVAL;
969 }
970 #else
971 printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
972 rc = EOPNOTSUPP;
973 #endif
974
975 return (rc);
976 }
977
978 static moduledata_t cxgbei_mod = {
979 "cxgbei",
980 cxgbei_modevent,
981 NULL,
982 };
983
984 MODULE_VERSION(cxgbei, 1);
985 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
986 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
987 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
988 MODULE_DEPEND(cxgbei, icl, 1, 1, 1);
Cache object: b17f7109f77dd67b2812d50c55e53f49
|