The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgbe/cxgbei/cxgbei.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2012 Chelsio Communications, Inc.
    3  * All rights reserved.
    4  *
    5  * Chelsio T5xx iSCSI driver
    6  *
    7  * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_inet.h"
   35 #include "opt_inet6.h"
   36 
   37 #include <sys/types.h>
   38 #include <sys/param.h>
   39 #include <sys/kernel.h>
   40 #include <sys/ktr.h>
   41 #include <sys/module.h>
   42 #include <sys/systm.h>
   43 
   44 #ifdef TCP_OFFLOAD
   45 #include <sys/errno.h>
   46 #include <sys/gsb_crc32.h>
   47 #include <sys/kthread.h>
   48 #include <sys/smp.h>
   49 #include <sys/socket.h>
   50 #include <sys/socketvar.h>
   51 #include <sys/mbuf.h>
   52 #include <sys/lock.h>
   53 #include <sys/mutex.h>
   54 #include <sys/condvar.h>
   55 #include <sys/uio.h>
   56 
   57 #include <netinet/in.h>
   58 #include <netinet/in_pcb.h>
   59 #include <netinet/toecore.h>
   60 #include <netinet/tcp_var.h>
   61 #include <netinet/tcp_fsm.h>
   62 
   63 #include <cam/scsi/scsi_all.h>
   64 #include <cam/scsi/scsi_da.h>
   65 #include <cam/ctl/ctl_io.h>
   66 #include <cam/ctl/ctl.h>
   67 #include <cam/ctl/ctl_backend.h>
   68 #include <cam/ctl/ctl_error.h>
   69 #include <cam/ctl/ctl_frontend.h>
   70 #include <cam/ctl/ctl_debug.h>
   71 #include <cam/ctl/ctl_ha.h>
   72 #include <cam/ctl/ctl_ioctl.h>
   73 
   74 #include <dev/iscsi/icl.h>
   75 #include <dev/iscsi/iscsi_proto.h>
   76 #include <dev/iscsi/iscsi_ioctl.h>
   77 #include <dev/iscsi/iscsi.h>
   78 #include <cam/ctl/ctl_frontend_iscsi.h>
   79 
   80 #include <cam/cam.h>
   81 #include <cam/cam_ccb.h>
   82 #include <cam/cam_xpt.h>
   83 #include <cam/cam_debug.h>
   84 #include <cam/cam_sim.h>
   85 #include <cam/cam_xpt_sim.h>
   86 #include <cam/cam_xpt_periph.h>
   87 #include <cam/cam_periph.h>
   88 #include <cam/cam_compat.h>
   89 #include <cam/scsi/scsi_message.h>
   90 
   91 #include "common/common.h"
   92 #include "common/t4_msg.h"
   93 #include "common/t4_regs.h"     /* for PCIE_MEM_ACCESS */
   94 #include "tom/t4_tom.h"
   95 #include "cxgbei.h"
   96 
   97 static void
   98 read_pdu_limits(struct adapter *sc, uint32_t *max_tx_data_len,
   99     uint32_t *max_rx_data_len, struct ppod_region *pr)
  100 {
  101         uint32_t tx_len, rx_len, r, v;
  102 
  103         rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE);
  104         tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
  105 
  106         r = t4_read_reg(sc, A_TP_PARA_REG2);
  107         rx_len = min(rx_len, G_MAXRXDATA(r));
  108         tx_len = min(tx_len, G_MAXRXDATA(r));
  109 
  110         r = t4_read_reg(sc, A_TP_PARA_REG7);
  111         v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
  112         rx_len = min(rx_len, v);
  113         tx_len = min(tx_len, v);
  114 
  115         /*
  116          * AHS is not supported by the kernel so we'll not account for
  117          * it either in our PDU len -> data segment len conversions.
  118          */
  119         rx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
  120             ISCSI_DATA_DIGEST_SIZE;
  121         tx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
  122             ISCSI_DATA_DIGEST_SIZE;
  123 
  124         /*
  125          * DDP can place only 4 pages for a single PDU.  A single
  126          * request might use larger pages than the smallest page size,
  127          * but that cannot be guaranteed.  Assume the smallest DDP
  128          * page size for this limit.
  129          */
  130         rx_len = min(rx_len, 4 * (1U << pr->pr_page_shift[0]));
  131 
  132         if (chip_id(sc) == CHELSIO_T5) {
  133                 tx_len = min(tx_len, 15360);
  134 
  135                 rx_len = rounddown2(rx_len, 512);
  136                 tx_len = rounddown2(tx_len, 512);
  137         }
  138 
  139         *max_tx_data_len = tx_len;
  140         *max_rx_data_len = rx_len;
  141 }
  142 
  143 /*
  144  * Initialize the software state of the iSCSI ULP driver.
  145  *
  146  * ENXIO means firmware didn't set up something that it was supposed to.
  147  */
  148 static int
  149 cxgbei_init(struct adapter *sc, struct cxgbei_data *ci)
  150 {
  151         struct sysctl_oid *oid;
  152         struct sysctl_oid_list *children;
  153         struct ppod_region *pr;
  154         uint32_t r;
  155         int rc;
  156 
  157         MPASS(sc->vres.iscsi.size > 0);
  158         MPASS(ci != NULL);
  159 
  160         pr = &ci->pr;
  161         r = t4_read_reg(sc, A_ULP_RX_ISCSI_PSZ);
  162         rc = t4_init_ppod_region(pr, &sc->vres.iscsi, r, "iSCSI page pods");
  163         if (rc != 0) {
  164                 device_printf(sc->dev,
  165                     "%s: failed to initialize the iSCSI page pod region: %u.\n",
  166                     __func__, rc);
  167                 return (rc);
  168         }
  169 
  170         read_pdu_limits(sc, &ci->max_tx_data_len, &ci->max_rx_data_len, pr);
  171 
  172         sysctl_ctx_init(&ci->ctx);
  173         oid = device_get_sysctl_tree(sc->dev);  /* dev.t5nex.X */
  174         children = SYSCTL_CHILDREN(oid);
  175 
  176         oid = SYSCTL_ADD_NODE(&ci->ctx, children, OID_AUTO, "iscsi",
  177             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "iSCSI ULP settings");
  178         children = SYSCTL_CHILDREN(oid);
  179 
  180         ci->ddp_threshold = 2048;
  181         SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "ddp_threshold",
  182             CTLFLAG_RW, &ci->ddp_threshold, 0, "Rx zero copy threshold");
  183 
  184         SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_rx_data_len",
  185             CTLFLAG_RW, &ci->max_rx_data_len, 0,
  186             "Maximum receive data segment length");
  187         SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_tx_data_len",
  188             CTLFLAG_RW, &ci->max_tx_data_len, 0,
  189             "Maximum transmit data segment length");
  190 
  191         return (0);
  192 }
  193 
  194 static int
  195 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
  196 {
  197         struct adapter *sc = iq->adapter;
  198         struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
  199         u_int tid = GET_TID(cpl);
  200         struct toepcb *toep = lookup_tid(sc, tid);
  201         struct icl_pdu *ip;
  202         struct icl_cxgbei_pdu *icp;
  203         uint16_t len_ddp = be16toh(cpl->pdu_len_ddp);
  204         uint16_t len = be16toh(cpl->len);
  205 
  206         M_ASSERTPKTHDR(m);
  207         MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
  208 
  209         ip = icl_cxgbei_new_pdu(M_NOWAIT);
  210         if (ip == NULL)
  211                 CXGBE_UNIMPLEMENTED("PDU allocation failure");
  212         m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
  213         ip->ip_data_len = G_ISCSI_PDU_LEN(len_ddp) - len;
  214         icp = ip_to_icp(ip);
  215         icp->icp_seq = ntohl(cpl->seq);
  216         icp->icp_flags = ICPF_RX_HDR;
  217 
  218         /* This is the start of a new PDU.  There should be no old state. */
  219         MPASS(toep->ulpcb2 == NULL);
  220         toep->ulpcb2 = icp;
  221 
  222 #if 0
  223         CTR5(KTR_CXGBE, "%s: tid %u, cpl->len %u, pdu_len_ddp 0x%04x, icp %p",
  224             __func__, tid, len, len_ddp, icp);
  225 #endif
  226 
  227         m_freem(m);
  228         return (0);
  229 }
  230 
  231 static int
  232 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
  233 {
  234         struct adapter *sc = iq->adapter;
  235         struct cpl_iscsi_data *cpl =  mtod(m, struct cpl_iscsi_data *);
  236         u_int tid = GET_TID(cpl);
  237         struct toepcb *toep = lookup_tid(sc, tid);
  238         struct icl_cxgbei_pdu *icp = toep->ulpcb2;
  239         struct icl_pdu *ip;
  240 
  241         M_ASSERTPKTHDR(m);
  242         MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
  243 
  244         if (icp == NULL) {
  245                 /*
  246                  * T6 completion enabled, start of a new pdu. Header
  247                  * will come in completion CPL.
  248                  */
  249                 ip = icl_cxgbei_new_pdu(M_NOWAIT);
  250                 if (ip == NULL)
  251                         CXGBE_UNIMPLEMENTED("PDU allocation failure");
  252                 icp = ip_to_icp(ip);
  253         } else {
  254                 /* T5 mode, header is already received. */
  255                 MPASS(icp->icp_flags == ICPF_RX_HDR);
  256                 MPASS(icp->ip.ip_data_mbuf == NULL);
  257                 MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
  258         }
  259 
  260         /* Trim the cpl header from mbuf. */
  261         m_adj(m, sizeof(*cpl));
  262 
  263         icp->icp_flags |= ICPF_RX_FLBUF;
  264         icp->ip.ip_data_mbuf = m;
  265         toep->ofld_rxq->rx_iscsi_fl_pdus++;
  266         toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
  267 
  268         /*
  269          * For T6, save the icp for further processing in the
  270          * completion handler.
  271          */
  272         if (icp->icp_flags == ICPF_RX_FLBUF) {
  273                 MPASS(toep->ulpcb2 == NULL);
  274                 toep->ulpcb2 = icp;
  275         }
  276 
  277 #if 0
  278         CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
  279             be16toh(cpl->len), icp);
  280 #endif
  281 
  282         return (0);
  283 }
  284 
  285 static int
  286 mbuf_crc32c_helper(void *arg, void *data, u_int len)
  287 {
  288         uint32_t *digestp = arg;
  289 
  290         *digestp = calculate_crc32c(*digestp, data, len);
  291         return (0);
  292 }
  293 
  294 static struct icl_pdu *
  295 parse_pdu(struct socket *so, struct toepcb *toep, struct icl_cxgbei_conn *icc,
  296     struct sockbuf *sb, u_int total_len)
  297 {
  298         struct uio uio;
  299         struct iovec iov[2];
  300         struct iscsi_bhs bhs;
  301         struct mbuf *m;
  302         struct icl_pdu *ip;
  303         u_int ahs_len, data_len, header_len, pdu_len;
  304         uint32_t calc_digest, wire_digest;
  305         int error;
  306 
  307         uio.uio_segflg = UIO_SYSSPACE;
  308         uio.uio_rw = UIO_READ;
  309         uio.uio_td = curthread;
  310 
  311         header_len = sizeof(struct iscsi_bhs);
  312         if (icc->ic.ic_header_crc32c)
  313                 header_len += ISCSI_HEADER_DIGEST_SIZE;
  314 
  315         if (total_len < header_len) {
  316                 ICL_WARN("truncated pre-offload PDU with len %u", total_len);
  317                 return (NULL);
  318         }
  319 
  320         iov[0].iov_base = &bhs;
  321         iov[0].iov_len = sizeof(bhs);
  322         iov[1].iov_base = &wire_digest;
  323         iov[1].iov_len = sizeof(wire_digest);
  324         uio.uio_iov = iov;
  325         uio.uio_iovcnt = 1;
  326         uio.uio_offset = 0;
  327         uio.uio_resid = header_len;
  328         error = soreceive(so, NULL, &uio, NULL, NULL, NULL);
  329         if (error != 0) {
  330                 ICL_WARN("failed to read BHS from pre-offload PDU: %d", error);
  331                 return (NULL);
  332         }
  333 
  334         ahs_len = bhs.bhs_total_ahs_len * 4;
  335         data_len = bhs.bhs_data_segment_len[0] << 16 |
  336             bhs.bhs_data_segment_len[1] << 8 |
  337             bhs.bhs_data_segment_len[2];
  338         pdu_len = header_len + ahs_len + roundup2(data_len, 4);
  339         if (icc->ic.ic_data_crc32c && data_len != 0)
  340                 pdu_len += ISCSI_DATA_DIGEST_SIZE;
  341 
  342         if (total_len < pdu_len) {
  343                 ICL_WARN("truncated pre-offload PDU len %u vs %u", total_len,
  344                     pdu_len);
  345                 return (NULL);
  346         }
  347 
  348         if (ahs_len != 0) {
  349                 ICL_WARN("received pre-offload PDU with AHS");
  350                 return (NULL);
  351         }
  352 
  353         if (icc->ic.ic_header_crc32c) {
  354                 calc_digest = calculate_crc32c(0xffffffff, (caddr_t)&bhs,
  355                     sizeof(bhs));
  356                 calc_digest ^= 0xffffffff;
  357                 if (calc_digest != wire_digest) {
  358                         ICL_WARN("received pre-offload PDU 0x%02x with "
  359                             "invalid header digest (0x%x vs 0x%x)",
  360                             bhs.bhs_opcode, wire_digest, calc_digest);
  361                         toep->ofld_rxq->rx_iscsi_header_digest_errors++;
  362                         return (NULL);
  363                 }
  364         }
  365 
  366         m = NULL;
  367         if (data_len != 0) {
  368                 uio.uio_iov = NULL;
  369                 uio.uio_resid = roundup2(data_len, 4);
  370                 if (icc->ic.ic_data_crc32c)
  371                         uio.uio_resid += ISCSI_DATA_DIGEST_SIZE;
  372 
  373                 error = soreceive(so, NULL, &uio, &m, NULL, NULL);
  374                 if (error != 0) {
  375                         ICL_WARN("failed to read data payload from "
  376                             "pre-offload PDU: %d", error);
  377                         return (NULL);
  378                 }
  379 
  380                 if (icc->ic.ic_data_crc32c) {
  381                         m_copydata(m, roundup2(data_len, 4),
  382                             sizeof(wire_digest), (caddr_t)&wire_digest);
  383 
  384                         calc_digest = 0xffffffff;
  385                         m_apply(m, 0, roundup2(data_len, 4), mbuf_crc32c_helper,
  386                             &calc_digest);
  387                         calc_digest ^= 0xffffffff;
  388                         if (calc_digest != wire_digest) {
  389                                 ICL_WARN("received pre-offload PDU 0x%02x "
  390                                     "with invalid data digest (0x%x vs 0x%x)",
  391                                     bhs.bhs_opcode, wire_digest, calc_digest);
  392                                 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
  393                                 m_freem(m);
  394                                 return (NULL);
  395                         }
  396                 }
  397         }
  398 
  399         ip = icl_cxgbei_new_pdu(M_WAITOK);
  400         icl_cxgbei_new_pdu_set_conn(ip, &icc->ic);
  401         *ip->ip_bhs = bhs;
  402         ip->ip_data_len = data_len;
  403         ip->ip_data_mbuf = m;
  404         return (ip);
  405 }
  406 
  407 void
  408 parse_pdus(struct icl_cxgbei_conn *icc, struct sockbuf *sb)
  409 {
  410         struct icl_conn *ic = &icc->ic;
  411         struct socket *so = ic->ic_socket;
  412         struct toepcb *toep = icc->toep;
  413         struct icl_pdu *ip, *lastip;
  414         u_int total_len;
  415 
  416         SOCKBUF_LOCK_ASSERT(sb);
  417 
  418         CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, toep->tid,
  419             sbused(sb));
  420 
  421         lastip = NULL;
  422         while (sbused(sb) != 0 && (sb->sb_state & SBS_CANTRCVMORE) == 0) {
  423                 total_len = sbused(sb);
  424                 SOCKBUF_UNLOCK(sb);
  425 
  426                 ip = parse_pdu(so, toep, icc, sb, total_len);
  427 
  428                 if (ip == NULL) {
  429                         ic->ic_error(ic);
  430                         SOCKBUF_LOCK(sb);
  431                         return;
  432                 }
  433 
  434                 if (lastip == NULL)
  435                         STAILQ_INSERT_HEAD(&icc->rcvd_pdus, ip, ip_next);
  436                 else
  437                         STAILQ_INSERT_AFTER(&icc->rcvd_pdus, lastip, ip,
  438                             ip_next);
  439                 lastip = ip;
  440 
  441                 SOCKBUF_LOCK(sb);
  442         }
  443 }
  444 
  445 static int
  446 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
  447 {
  448         struct adapter *sc = iq->adapter;
  449         const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
  450         u_int tid = GET_TID(cpl);
  451         struct toepcb *toep = lookup_tid(sc, tid);
  452         struct inpcb *inp = toep->inp;
  453         struct socket *so;
  454         struct sockbuf *sb;
  455         struct tcpcb *tp;
  456         struct icl_cxgbei_conn *icc;
  457         struct icl_conn *ic;
  458         struct icl_cxgbei_pdu *icp = toep->ulpcb2;
  459         struct icl_pdu *ip;
  460         u_int pdu_len, val;
  461         struct epoch_tracker et;
  462 
  463         MPASS(m == NULL);
  464 
  465         /* Must already be assembling a PDU. */
  466         MPASS(icp != NULL);
  467         MPASS(icp->icp_flags & ICPF_RX_HDR);    /* Data is optional. */
  468         MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
  469 
  470         pdu_len = be16toh(cpl->len);    /* includes everything. */
  471         val = be32toh(cpl->ddpvld);
  472 
  473 #if 0
  474         CTR5(KTR_CXGBE,
  475             "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x",
  476             __func__, tid, pdu_len, val, icp->icp_flags);
  477 #endif
  478 
  479         icp->icp_flags |= ICPF_RX_STATUS;
  480         ip = &icp->ip;
  481         if (val & F_DDP_PADDING_ERR) {
  482                 ICL_WARN("received PDU 0x%02x with invalid padding",
  483                     ip->ip_bhs->bhs_opcode);
  484                 toep->ofld_rxq->rx_iscsi_padding_errors++;
  485         }
  486         if (val & F_DDP_HDRCRC_ERR) {
  487                 ICL_WARN("received PDU 0x%02x with invalid header digest",
  488                     ip->ip_bhs->bhs_opcode);
  489                 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
  490         }
  491         if (val & F_DDP_DATACRC_ERR) {
  492                 ICL_WARN("received PDU 0x%02x with invalid data digest",
  493                     ip->ip_bhs->bhs_opcode);
  494                 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
  495         }
  496         if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
  497                 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
  498                 MPASS(ip->ip_data_len > 0);
  499                 icp->icp_flags |= ICPF_RX_DDP;
  500                 toep->ofld_rxq->rx_iscsi_ddp_pdus++;
  501                 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
  502         }
  503 
  504         INP_WLOCK(inp);
  505         if (__predict_false(inp->inp_flags & INP_DROPPED)) {
  506                 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
  507                     __func__, tid, pdu_len, inp->inp_flags);
  508                 INP_WUNLOCK(inp);
  509                 icl_cxgbei_conn_pdu_free(NULL, ip);
  510                 toep->ulpcb2 = NULL;
  511                 return (0);
  512         }
  513 
  514         /*
  515          * T6+ does not report data PDUs received via DDP without F
  516          * set.  This can result in gaps in the TCP sequence space.
  517          */
  518         tp = intotcpcb(inp);
  519         MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
  520         tp->rcv_nxt = icp->icp_seq + pdu_len;
  521         tp->t_rcvtime = ticks;
  522 
  523         /*
  524          * Don't update the window size or return credits since RX
  525          * flow control is disabled.
  526          */
  527 
  528         so = inp->inp_socket;
  529         sb = &so->so_rcv;
  530         SOCKBUF_LOCK(sb);
  531 
  532         icc = toep->ulpcb;
  533         if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
  534                 CTR5(KTR_CXGBE,
  535                     "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
  536                     __func__, tid, pdu_len, icc, sb->sb_state);
  537                 SOCKBUF_UNLOCK(sb);
  538                 INP_WUNLOCK(inp);
  539 
  540                 CURVNET_SET(so->so_vnet);
  541                 NET_EPOCH_ENTER(et);
  542                 INP_WLOCK(inp);
  543                 tp = tcp_drop(tp, ECONNRESET);
  544                 if (tp)
  545                         INP_WUNLOCK(inp);
  546                 NET_EPOCH_EXIT(et);
  547                 CURVNET_RESTORE();
  548 
  549                 icl_cxgbei_conn_pdu_free(NULL, ip);
  550                 toep->ulpcb2 = NULL;
  551                 return (0);
  552         }
  553         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
  554         ic = &icc->ic;
  555         if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
  556             F_DDP_DATACRC_ERR)) != 0) {
  557                 SOCKBUF_UNLOCK(sb);
  558                 INP_WUNLOCK(inp);
  559 
  560                 icl_cxgbei_conn_pdu_free(NULL, ip);
  561                 toep->ulpcb2 = NULL;
  562                 ic->ic_error(ic);
  563                 return (0);
  564         }
  565 
  566         icl_cxgbei_new_pdu_set_conn(ip, ic);
  567 
  568         STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
  569         if (!icc->rx_active) {
  570                 icc->rx_active = true;
  571                 wakeup(&icc->rx_active);
  572         }
  573         SOCKBUF_UNLOCK(sb);
  574         INP_WUNLOCK(inp);
  575 
  576         toep->ulpcb2 = NULL;
  577 
  578         return (0);
  579 }
  580 
  581 static int
  582 do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
  583 {
  584         struct epoch_tracker et;
  585         struct adapter *sc = iq->adapter;
  586         struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
  587         u_int tid = GET_TID(cpl);
  588         struct toepcb *toep = lookup_tid(sc, tid);
  589         struct icl_cxgbei_pdu *icp = toep->ulpcb2;
  590         struct icl_pdu *ip;
  591         struct cxgbei_cmp *cmp;
  592         struct inpcb *inp = toep->inp;
  593 #ifdef INVARIANTS
  594         uint16_t len = be16toh(cpl->len);
  595         u_int data_digest_len;
  596 #endif
  597         struct socket *so;
  598         struct sockbuf *sb;
  599         struct tcpcb *tp;
  600         struct icl_cxgbei_conn *icc;
  601         struct icl_conn *ic;
  602         struct iscsi_bhs_data_out *bhsdo;
  603         u_int val = be32toh(cpl->ddpvld);
  604         u_int npdus, pdu_len;
  605         uint32_t prev_seg_len;
  606 
  607         M_ASSERTPKTHDR(m);
  608         MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
  609 
  610         if ((val & F_DDP_PDU) == 0) {
  611                 MPASS(icp != NULL);
  612                 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
  613                 ip = &icp->ip;
  614         }
  615 
  616         if (icp == NULL) {
  617                 /* T6 completion enabled, start of a new PDU. */
  618                 ip = icl_cxgbei_new_pdu(M_NOWAIT);
  619                 if (ip == NULL)
  620                         CXGBE_UNIMPLEMENTED("PDU allocation failure");
  621                 icp = ip_to_icp(ip);
  622         }
  623         pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
  624 
  625 #if 0
  626         CTR5(KTR_CXGBE,
  627             "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
  628             __func__, tid, pdu_len, val, icp);
  629 #endif
  630 
  631         /* Copy header */
  632         m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
  633         bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
  634         ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
  635             bhsdo->bhsdo_data_segment_len[1] << 8 |
  636             bhsdo->bhsdo_data_segment_len[2];
  637         icp->icp_seq = ntohl(cpl->seq);
  638         icp->icp_flags |= ICPF_RX_HDR;
  639         icp->icp_flags |= ICPF_RX_STATUS;
  640 
  641         if (val & F_DDP_PADDING_ERR) {
  642                 ICL_WARN("received PDU 0x%02x with invalid padding",
  643                     ip->ip_bhs->bhs_opcode);
  644                 toep->ofld_rxq->rx_iscsi_padding_errors++;
  645         }
  646         if (val & F_DDP_HDRCRC_ERR) {
  647                 ICL_WARN("received PDU 0x%02x with invalid header digest",
  648                     ip->ip_bhs->bhs_opcode);
  649                 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
  650         }
  651         if (val & F_DDP_DATACRC_ERR) {
  652                 ICL_WARN("received PDU 0x%02x with invalid data digest",
  653                     ip->ip_bhs->bhs_opcode);
  654                 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
  655         }
  656 
  657         INP_WLOCK(inp);
  658         if (__predict_false(inp->inp_flags & INP_DROPPED)) {
  659                 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
  660                     __func__, tid, pdu_len, inp->inp_flags);
  661                 INP_WUNLOCK(inp);
  662                 icl_cxgbei_conn_pdu_free(NULL, ip);
  663                 toep->ulpcb2 = NULL;
  664                 m_freem(m);
  665                 return (0);
  666         }
  667 
  668         tp = intotcpcb(inp);
  669 
  670         /*
  671          * If icc is NULL, the connection is being closed in
  672          * icl_cxgbei_conn_close(), just drop this data.
  673          */
  674         icc = toep->ulpcb;
  675         if (__predict_false(icc == NULL)) {
  676                 CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
  677                     __func__, tid, pdu_len, icc);
  678 
  679                 /*
  680                  * Update rcv_nxt so the sequence number of the FIN
  681                  * doesn't appear wrong.
  682                  */
  683                 tp->rcv_nxt = icp->icp_seq + pdu_len;
  684                 tp->t_rcvtime = ticks;
  685                 INP_WUNLOCK(inp);
  686 
  687                 icl_cxgbei_conn_pdu_free(NULL, ip);
  688                 toep->ulpcb2 = NULL;
  689                 m_freem(m);
  690                 return (0);
  691         }
  692 
  693         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
  694         ic = &icc->ic;
  695         if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
  696             F_DDP_DATACRC_ERR)) != 0) {
  697                 INP_WUNLOCK(inp);
  698 
  699                 icl_cxgbei_conn_pdu_free(NULL, ip);
  700                 toep->ulpcb2 = NULL;
  701                 m_freem(m);
  702                 ic->ic_error(ic);
  703                 return (0);
  704         }
  705 
  706 #ifdef INVARIANTS
  707         data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
  708             ISCSI_DATA_DIGEST_SIZE : 0;
  709         MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
  710 #endif
  711 
  712         if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
  713                 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
  714                 MPASS(ip->ip_data_len > 0);
  715                 icp->icp_flags |= ICPF_RX_DDP;
  716                 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
  717 
  718                 switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
  719                 case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
  720                         cmp = cxgbei_find_cmp(icc,
  721                             be32toh(bhsdo->bhsdo_initiator_task_tag));
  722                         break;
  723                 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
  724                         cmp = cxgbei_find_cmp(icc,
  725                             be32toh(bhsdo->bhsdo_target_transfer_tag));
  726                         break;
  727                 default:
  728                         __assert_unreachable();
  729                 }
  730                 MPASS(cmp != NULL);
  731 
  732                 /*
  733                  * The difference between the end of the last burst
  734                  * and the offset of the last PDU in this burst is
  735                  * the additional data received via DDP.
  736                  */
  737                 prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
  738                     cmp->next_buffer_offset;
  739 
  740                 if (prev_seg_len != 0) {
  741                         uint32_t orig_datasn;
  742 
  743                         /*
  744                          * Return a "large" PDU representing the burst
  745                          * of PDUs.  Adjust the offset and length of
  746                          * this PDU to represent the entire burst.
  747                          */
  748                         ip->ip_data_len += prev_seg_len;
  749                         bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
  750                         bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
  751                         bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
  752                         bhsdo->bhsdo_buffer_offset =
  753                             htobe32(cmp->next_buffer_offset);
  754 
  755                         orig_datasn = htobe32(bhsdo->bhsdo_datasn);
  756                         npdus = orig_datasn - cmp->last_datasn;
  757                         bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1);
  758                         cmp->last_datasn = orig_datasn;
  759                         ip->ip_additional_pdus = npdus - 1;
  760                 } else {
  761                         MPASS(htobe32(bhsdo->bhsdo_datasn) ==
  762                             cmp->last_datasn + 1);
  763                         npdus = 1;
  764                         cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
  765                 }
  766 
  767                 cmp->next_buffer_offset += ip->ip_data_len;
  768                 toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
  769                 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
  770         } else {
  771                 MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
  772                 MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
  773         }
  774 
  775         tp->rcv_nxt = icp->icp_seq + pdu_len;
  776         tp->t_rcvtime = ticks;
  777 
  778         /*
  779          * Don't update the window size or return credits since RX
  780          * flow control is disabled.
  781          */
  782 
  783         so = inp->inp_socket;
  784         sb = &so->so_rcv;
  785         SOCKBUF_LOCK(sb);
  786         if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
  787                 CTR5(KTR_CXGBE,
  788                     "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
  789                     __func__, tid, pdu_len, icc, sb->sb_state);
  790                 SOCKBUF_UNLOCK(sb);
  791                 INP_WUNLOCK(inp);
  792 
  793                 CURVNET_SET(so->so_vnet);
  794                 NET_EPOCH_ENTER(et);
  795                 INP_WLOCK(inp);
  796                 tp = tcp_drop(tp, ECONNRESET);
  797                 if (tp != NULL)
  798                         INP_WUNLOCK(inp);
  799                 NET_EPOCH_EXIT(et);
  800                 CURVNET_RESTORE();
  801 
  802                 icl_cxgbei_conn_pdu_free(NULL, ip);
  803                 toep->ulpcb2 = NULL;
  804                 m_freem(m);
  805                 return (0);
  806         }
  807 
  808         icl_cxgbei_new_pdu_set_conn(ip, ic);
  809 
  810         /* Enqueue the PDU to the received pdus queue. */
  811         STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
  812         if (!icc->rx_active) {
  813                 icc->rx_active = true;
  814                 wakeup(&icc->rx_active);
  815         }
  816         SOCKBUF_UNLOCK(sb);
  817         INP_WUNLOCK(inp);
  818 
  819         toep->ulpcb2 = NULL;
  820         m_freem(m);
  821 
  822         return (0);
  823 }
  824 
  825 static int
  826 cxgbei_activate(struct adapter *sc)
  827 {
  828         struct cxgbei_data *ci;
  829         int rc;
  830 
  831         ASSERT_SYNCHRONIZED_OP(sc);
  832 
  833         if (uld_active(sc, ULD_ISCSI)) {
  834                 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
  835                     __func__, sc));
  836                 return (0);
  837         }
  838 
  839         if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
  840                 device_printf(sc->dev,
  841                     "not iSCSI offload capable, or capability disabled.\n");
  842                 return (ENOSYS);
  843         }
  844 
  845         /* per-adapter softc for iSCSI */
  846         ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_WAITOK);
  847         if (ci == NULL)
  848                 return (ENOMEM);
  849 
  850         rc = cxgbei_init(sc, ci);
  851         if (rc != 0) {
  852                 free(ci, M_CXGBE);
  853                 return (rc);
  854         }
  855 
  856         sc->iscsi_ulp_softc = ci;
  857 
  858         return (0);
  859 }
  860 
  861 static int
  862 cxgbei_deactivate(struct adapter *sc)
  863 {
  864         struct cxgbei_data *ci = sc->iscsi_ulp_softc;
  865 
  866         ASSERT_SYNCHRONIZED_OP(sc);
  867 
  868         if (ci != NULL) {
  869                 sysctl_ctx_free(&ci->ctx);
  870                 t4_free_ppod_region(&ci->pr);
  871                 free(ci, M_CXGBE);
  872                 sc->iscsi_ulp_softc = NULL;
  873         }
  874 
  875         return (0);
  876 }
  877 
  878 static void
  879 cxgbei_activate_all(struct adapter *sc, void *arg __unused)
  880 {
  881 
  882         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
  883                 return;
  884 
  885         /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
  886         if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
  887                 (void) t4_activate_uld(sc, ULD_ISCSI);
  888 
  889         end_synchronized_op(sc, 0);
  890 }
  891 
  892 static void
  893 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
  894 {
  895 
  896         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
  897                 return;
  898 
  899         if (uld_active(sc, ULD_ISCSI))
  900             (void) t4_deactivate_uld(sc, ULD_ISCSI);
  901 
  902         end_synchronized_op(sc, 0);
  903 }
  904 
  905 static struct uld_info cxgbei_uld_info = {
  906         .uld_id = ULD_ISCSI,
  907         .activate = cxgbei_activate,
  908         .deactivate = cxgbei_deactivate,
  909 };
  910 
  911 static int
  912 cxgbei_mod_load(void)
  913 {
  914         int rc;
  915 
  916         t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
  917         t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
  918         t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
  919         t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp);
  920 
  921         rc = t4_register_uld(&cxgbei_uld_info);
  922         if (rc != 0)
  923                 return (rc);
  924 
  925         t4_iterate(cxgbei_activate_all, NULL);
  926 
  927         return (rc);
  928 }
  929 
  930 static int
  931 cxgbei_mod_unload(void)
  932 {
  933 
  934         t4_iterate(cxgbei_deactivate_all, NULL);
  935 
  936         if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY)
  937                 return (EBUSY);
  938 
  939         t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
  940         t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
  941         t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
  942         t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL);
  943 
  944         return (0);
  945 }
  946 #endif
  947 
  948 static int
  949 cxgbei_modevent(module_t mod, int cmd, void *arg)
  950 {
  951         int rc = 0;
  952 
  953 #ifdef TCP_OFFLOAD
  954         switch (cmd) {
  955         case MOD_LOAD:
  956                 rc = cxgbei_mod_load();
  957                 if (rc == 0)
  958                         rc = icl_cxgbei_mod_load();
  959                 break;
  960 
  961         case MOD_UNLOAD:
  962                 rc = icl_cxgbei_mod_unload();
  963                 if (rc == 0)
  964                         rc = cxgbei_mod_unload();
  965                 break;
  966 
  967         default:
  968                 rc = EINVAL;
  969         }
  970 #else
  971         printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
  972         rc = EOPNOTSUPP;
  973 #endif
  974 
  975         return (rc);
  976 }
  977 
  978 static moduledata_t cxgbei_mod = {
  979         "cxgbei",
  980         cxgbei_modevent,
  981         NULL,
  982 };
  983 
  984 MODULE_VERSION(cxgbei, 1);
  985 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
  986 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
  987 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
  988 MODULE_DEPEND(cxgbei, icl, 1, 1, 1);

Cache object: b17f7109f77dd67b2812d50c55e53f49


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.