The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgbe/cxgbei/icl_cxgbei.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2012 The FreeBSD Foundation
    3  * Copyright (c) 2015 Chelsio Communications, Inc.
    4  * All rights reserved.
    5  *
    6  * This software was developed by Edward Tomasz Napierala under sponsorship
    7  * from the FreeBSD Foundation.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  */
   31 
   32 /*
   33  * cxgbei implementation of iSCSI Common Layer kobj(9) interface.
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD$");
   38 
   39 #include "opt_inet.h"
   40 #include "opt_inet6.h"
   41 
   42 #ifdef TCP_OFFLOAD
   43 #include <sys/param.h>
   44 #include <sys/bio.h>
   45 #include <sys/capsicum.h>
   46 #include <sys/condvar.h>
   47 #include <sys/conf.h>
   48 #include <sys/file.h>
   49 #include <sys/kernel.h>
   50 #include <sys/kthread.h>
   51 #include <sys/ktr.h>
   52 #include <sys/lock.h>
   53 #include <sys/mbuf.h>
   54 #include <sys/mutex.h>
   55 #include <sys/module.h>
   56 #include <sys/protosw.h>
   57 #include <sys/socket.h>
   58 #include <sys/socketvar.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/systm.h>
   61 #include <sys/sx.h>
   62 #include <sys/uio.h>
   63 #include <machine/bus.h>
   64 #include <vm/vm.h>
   65 #include <vm/vm_page.h>
   66 #include <vm/pmap.h>
   67 #include <netinet/in.h>
   68 #include <netinet/in_pcb.h>
   69 #include <netinet/tcp.h>
   70 #include <netinet/tcp_var.h>
   71 #include <netinet/toecore.h>
   72 
   73 #include <dev/iscsi/icl.h>
   74 #include <dev/iscsi/iscsi_proto.h>
   75 #include <icl_conn_if.h>
   76 
   77 #include <cam/scsi/scsi_all.h>
   78 #include <cam/scsi/scsi_da.h>
   79 #include <cam/ctl/ctl_io.h>
   80 #include <cam/ctl/ctl.h>
   81 #include <cam/ctl/ctl_backend.h>
   82 #include <cam/ctl/ctl_error.h>
   83 #include <cam/ctl/ctl_frontend.h>
   84 #include <cam/ctl/ctl_debug.h>
   85 #include <cam/ctl/ctl_ha.h>
   86 #include <cam/ctl/ctl_ioctl.h>
   87 
   88 #include <cam/cam.h>
   89 #include <cam/cam_ccb.h>
   90 #include <cam/cam_xpt.h>
   91 #include <cam/cam_debug.h>
   92 #include <cam/cam_sim.h>
   93 #include <cam/cam_xpt_sim.h>
   94 #include <cam/cam_xpt_periph.h>
   95 #include <cam/cam_periph.h>
   96 #include <cam/cam_compat.h>
   97 #include <cam/scsi/scsi_message.h>
   98 
   99 #include "common/common.h"
  100 #include "common/t4_regs.h"
  101 #include "common/t4_tcb.h"
  102 #include "tom/t4_tom.h"
  103 #include "cxgbei.h"
  104 
  105 /*
  106  * Use the page pod tag for the TT hash.
  107  */
  108 #define TT_HASH(icc, tt)        (G_PPOD_TAG(tt) & (icc)->cmp_hash_mask)
  109 
  110 struct cxgbei_ddp_state {
  111         struct ppod_reservation prsv;
  112         struct cxgbei_cmp cmp;
  113 };
  114 
  115 static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)");
  116 
  117 SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  118     "Chelsio iSCSI offload");
  119 static int first_burst_length = 8192;
  120 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, first_burst_length, CTLFLAG_RWTUN,
  121     &first_burst_length, 0, "First burst length");
  122 static int max_burst_length = 2 * 1024 * 1024;
  123 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, max_burst_length, CTLFLAG_RWTUN,
  124     &max_burst_length, 0, "Maximum burst length");
  125 static int sendspace = 1048576;
  126 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN,
  127     &sendspace, 0, "Default send socket buffer size");
  128 static int recvspace = 1048576;
  129 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN,
  130     &recvspace, 0, "Default receive socket buffer size");
  131 
  132 static volatile u_int icl_cxgbei_ncons;
  133 
  134 static icl_conn_new_pdu_t       icl_cxgbei_conn_new_pdu;
  135 static icl_conn_pdu_data_segment_length_t
  136                                     icl_cxgbei_conn_pdu_data_segment_length;
  137 static icl_conn_pdu_append_bio_t        icl_cxgbei_conn_pdu_append_bio;
  138 static icl_conn_pdu_append_data_t       icl_cxgbei_conn_pdu_append_data;
  139 static icl_conn_pdu_get_bio_t   icl_cxgbei_conn_pdu_get_bio;
  140 static icl_conn_pdu_get_data_t  icl_cxgbei_conn_pdu_get_data;
  141 static icl_conn_pdu_queue_t     icl_cxgbei_conn_pdu_queue;
  142 static icl_conn_pdu_queue_cb_t  icl_cxgbei_conn_pdu_queue_cb;
  143 static icl_conn_handoff_t       icl_cxgbei_conn_handoff;
  144 static icl_conn_free_t          icl_cxgbei_conn_free;
  145 static icl_conn_close_t         icl_cxgbei_conn_close;
  146 static icl_conn_task_setup_t    icl_cxgbei_conn_task_setup;
  147 static icl_conn_task_done_t     icl_cxgbei_conn_task_done;
  148 static icl_conn_transfer_setup_t        icl_cxgbei_conn_transfer_setup;
  149 static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done;
  150 
  151 static kobj_method_t icl_cxgbei_methods[] = {
  152         KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu),
  153         KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free),
  154         KOBJMETHOD(icl_conn_pdu_data_segment_length,
  155             icl_cxgbei_conn_pdu_data_segment_length),
  156         KOBJMETHOD(icl_conn_pdu_append_bio, icl_cxgbei_conn_pdu_append_bio),
  157         KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data),
  158         KOBJMETHOD(icl_conn_pdu_get_bio, icl_cxgbei_conn_pdu_get_bio),
  159         KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data),
  160         KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue),
  161         KOBJMETHOD(icl_conn_pdu_queue_cb, icl_cxgbei_conn_pdu_queue_cb),
  162         KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff),
  163         KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free),
  164         KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close),
  165         KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup),
  166         KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done),
  167         KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup),
  168         KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done),
  169         { 0, 0 }
  170 };
  171 
  172 DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn));
  173 
  174 void
  175 icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
  176 {
  177         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  178 
  179         KASSERT(icp->ref_cnt != 0, ("freeing deleted PDU"));
  180         MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE);
  181         MPASS(ic == ip->ip_conn);
  182 
  183         m_freem(ip->ip_ahs_mbuf);
  184         m_freem(ip->ip_data_mbuf);
  185         m_freem(ip->ip_bhs_mbuf);
  186 
  187         KASSERT(ic != NULL || icp->ref_cnt == 1,
  188             ("orphaned PDU has oustanding references"));
  189 
  190         if (atomic_fetchadd_int(&icp->ref_cnt, -1) != 1)
  191                 return;
  192 
  193         free(icp, M_CXGBEI);
  194 #ifdef DIAGNOSTIC
  195         if (__predict_true(ic != NULL))
  196                 refcount_release(&ic->ic_outstanding_pdus);
  197 #endif
  198 }
  199 
  200 static void
  201 icl_cxgbei_pdu_call_cb(struct icl_pdu *ip)
  202 {
  203         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  204 
  205         MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE);
  206 
  207         if (icp->cb != NULL)
  208                 icp->cb(ip, icp->error);
  209 #ifdef DIAGNOSTIC
  210         if (__predict_true(ip->ip_conn != NULL))
  211                 refcount_release(&ip->ip_conn->ic_outstanding_pdus);
  212 #endif
  213         free(icp, M_CXGBEI);
  214 }
  215 
  216 static void
  217 icl_cxgbei_pdu_done(struct icl_pdu *ip, int error)
  218 {
  219         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  220 
  221         if (error != 0)
  222                 icp->error = error;
  223 
  224         m_freem(ip->ip_ahs_mbuf);
  225         ip->ip_ahs_mbuf = NULL;
  226         m_freem(ip->ip_data_mbuf);
  227         ip->ip_data_mbuf = NULL;
  228         m_freem(ip->ip_bhs_mbuf);
  229         ip->ip_bhs_mbuf = NULL;
  230 
  231         /*
  232          * All other references to this PDU should have been dropped
  233          * by the m_freem() of ip_data_mbuf.
  234          */
  235         if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1)
  236                 icl_cxgbei_pdu_call_cb(ip);
  237         else
  238                 __assert_unreachable();
  239 }
  240 
  241 static void
  242 icl_cxgbei_mbuf_done(struct mbuf *mb)
  243 {
  244 
  245         struct icl_cxgbei_pdu *icp = (struct icl_cxgbei_pdu *)mb->m_ext.ext_arg1;
  246 
  247         /*
  248          * NB: mb_free_mext() might leave ref_cnt as 1 without
  249          * decrementing it if it hits the fast path in the ref_cnt
  250          * check.
  251          */
  252         icl_cxgbei_pdu_call_cb(&icp->ip);
  253 }
  254 
  255 struct icl_pdu *
  256 icl_cxgbei_new_pdu(int flags)
  257 {
  258         struct icl_cxgbei_pdu *icp;
  259         struct icl_pdu *ip;
  260         struct mbuf *m;
  261 
  262         icp = malloc(sizeof(*icp), M_CXGBEI, flags | M_ZERO);
  263         if (__predict_false(icp == NULL))
  264                 return (NULL);
  265 
  266         icp->icp_signature = CXGBEI_PDU_SIGNATURE;
  267         icp->ref_cnt = 1;
  268         ip = &icp->ip;
  269 
  270         m = m_gethdr(flags, MT_DATA);
  271         if (__predict_false(m == NULL)) {
  272                 free(icp, M_CXGBEI);
  273                 return (NULL);
  274         }
  275 
  276         ip->ip_bhs_mbuf = m;
  277         ip->ip_bhs = mtod(m, struct iscsi_bhs *);
  278         memset(ip->ip_bhs, 0, sizeof(*ip->ip_bhs));
  279         m->m_len = sizeof(struct iscsi_bhs);
  280         m->m_pkthdr.len = m->m_len;
  281 
  282         return (ip);
  283 }
  284 
  285 void
  286 icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic)
  287 {
  288 
  289         ip->ip_conn = ic;
  290 #ifdef DIAGNOSTIC
  291         refcount_acquire(&ic->ic_outstanding_pdus);
  292 #endif
  293 }
  294 
  295 /*
  296  * Allocate icl_pdu with empty BHS to fill up by the caller.
  297  */
  298 static struct icl_pdu *
  299 icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags)
  300 {
  301         struct icl_pdu *ip;
  302 
  303         ip = icl_cxgbei_new_pdu(flags);
  304         if (__predict_false(ip == NULL))
  305                 return (NULL);
  306         icl_cxgbei_new_pdu_set_conn(ip, ic);
  307 
  308         return (ip);
  309 }
  310 
  311 static size_t
  312 icl_pdu_data_segment_length(const struct icl_pdu *request)
  313 {
  314         uint32_t len = 0;
  315 
  316         len += request->ip_bhs->bhs_data_segment_len[0];
  317         len <<= 8;
  318         len += request->ip_bhs->bhs_data_segment_len[1];
  319         len <<= 8;
  320         len += request->ip_bhs->bhs_data_segment_len[2];
  321 
  322         return (len);
  323 }
  324 
  325 size_t
  326 icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic,
  327     const struct icl_pdu *request)
  328 {
  329 
  330         return (icl_pdu_data_segment_length(request));
  331 }
  332 
  333 static struct mbuf *
  334 finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp)
  335 {
  336         struct icl_pdu *ip = &icp->ip;
  337         uint8_t ulp_submode, padding;
  338         struct mbuf *m, *last;
  339         struct iscsi_bhs *bhs;
  340         int data_len;
  341 
  342         /*
  343          * Fix up the data segment mbuf first.
  344          */
  345         m = ip->ip_data_mbuf;
  346         ulp_submode = icc->ulp_submode;
  347         if (m != NULL) {
  348                 last = m_last(m);
  349 
  350                 /*
  351                  * Round up the data segment to a 4B boundary.  Pad with 0 if
  352                  * necessary.  There will definitely be room in the mbuf.
  353                  */
  354                 padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len;
  355                 if (padding != 0) {
  356                         MPASS(padding <= M_TRAILINGSPACE(last));
  357                         bzero(mtod(last, uint8_t *) + last->m_len, padding);
  358                         last->m_len += padding;
  359                 }
  360         } else {
  361                 MPASS(ip->ip_data_len == 0);
  362                 ulp_submode &= ~ULP_CRC_DATA;
  363                 padding = 0;
  364         }
  365 
  366         /*
  367          * Now the header mbuf that has the BHS.
  368          */
  369         m = ip->ip_bhs_mbuf;
  370         MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs));
  371         MPASS(m->m_len == sizeof(struct iscsi_bhs));
  372 
  373         bhs = ip->ip_bhs;
  374         data_len = ip->ip_data_len;
  375         if (data_len > icc->ic.ic_max_send_data_segment_length) {
  376                 struct iscsi_bhs_data_in *bhsdi;
  377                 int flags;
  378 
  379                 KASSERT(padding == 0, ("%s: ISO with padding %d for icp %p",
  380                     __func__, padding, icp));
  381                 switch (bhs->bhs_opcode) {
  382                 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
  383                         flags = 1;
  384                         break;
  385                 case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
  386                         flags = 2;
  387                         break;
  388                 default:
  389                         panic("invalid opcode %#x for ISO", bhs->bhs_opcode);
  390                 }
  391                 data_len = icc->ic.ic_max_send_data_segment_length;
  392                 bhsdi = (struct iscsi_bhs_data_in *)bhs;
  393                 if (bhsdi->bhsdi_flags & BHSDI_FLAGS_F) {
  394                         /*
  395                          * Firmware will set F on the final PDU in the
  396                          * burst.
  397                          */
  398                         flags |= CXGBE_ISO_F;
  399                         bhsdi->bhsdi_flags &= ~BHSDI_FLAGS_F;
  400                 }
  401                 set_mbuf_iscsi_iso(m, true);
  402                 set_mbuf_iscsi_iso_flags(m, flags);
  403                 set_mbuf_iscsi_iso_mss(m, data_len);
  404         }
  405 
  406         bhs->bhs_data_segment_len[2] = data_len;
  407         bhs->bhs_data_segment_len[1] = data_len >> 8;
  408         bhs->bhs_data_segment_len[0] = data_len >> 16;
  409 
  410         /*
  411          * Extract mbuf chain from PDU.
  412          */
  413         m->m_pkthdr.len += ip->ip_data_len + padding;
  414         m->m_next = ip->ip_data_mbuf;
  415         set_mbuf_ulp_submode(m, ulp_submode);
  416         ip->ip_bhs_mbuf = NULL;
  417         ip->ip_data_mbuf = NULL;
  418         ip->ip_bhs = NULL;
  419 
  420         /*
  421          * Drop PDU reference on icp.  Additional references might
  422          * still be held by zero-copy PDU buffers (ICL_NOCOPY).
  423          */
  424         if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1)
  425                 icl_cxgbei_pdu_call_cb(ip);
  426 
  427         return (m);
  428 }
  429 
  430 static void
  431 icl_cxgbei_tx_main(void *arg)
  432 {
  433         struct epoch_tracker et;
  434         struct icl_cxgbei_conn *icc = arg;
  435         struct icl_conn *ic = &icc->ic;
  436         struct toepcb *toep = icc->toep;
  437         struct socket *so = ic->ic_socket;
  438         struct inpcb *inp = sotoinpcb(so);
  439         struct icl_pdu *ip;
  440         struct mbuf *m;
  441         struct mbufq mq;
  442         STAILQ_HEAD(, icl_pdu) tx_pdus = STAILQ_HEAD_INITIALIZER(tx_pdus);
  443 
  444         mbufq_init(&mq, INT_MAX);
  445 
  446         ICL_CONN_LOCK(ic);
  447         while (__predict_true(!ic->ic_disconnecting)) {
  448                 while (STAILQ_EMPTY(&icc->sent_pdus)) {
  449                         icc->tx_active = false;
  450                         mtx_sleep(&icc->tx_active, ic->ic_lock, 0, "-", 0);
  451                         if (__predict_false(ic->ic_disconnecting))
  452                                 goto out;
  453                         MPASS(icc->tx_active);
  454                 }
  455 
  456                 STAILQ_SWAP(&icc->sent_pdus, &tx_pdus, icl_pdu);
  457                 ICL_CONN_UNLOCK(ic);
  458 
  459                 while ((ip = STAILQ_FIRST(&tx_pdus)) != NULL) {
  460                         STAILQ_REMOVE_HEAD(&tx_pdus, ip_next);
  461 
  462                         m = finalize_pdu(icc, ip_to_icp(ip));
  463                         M_ASSERTPKTHDR(m);
  464                         MPASS((m->m_pkthdr.len & 3) == 0);
  465 
  466                         mbufq_enqueue(&mq, m);
  467                 }
  468 
  469                 ICL_CONN_LOCK(ic);
  470                 if (__predict_false(ic->ic_disconnecting) ||
  471                     __predict_false(ic->ic_socket == NULL)) {
  472                         mbufq_drain(&mq);
  473                         break;
  474                 }
  475 
  476                 CURVNET_SET(toep->vnet);
  477                 NET_EPOCH_ENTER(et);
  478                 INP_WLOCK(inp);
  479 
  480                 ICL_CONN_UNLOCK(ic);
  481                 if (__predict_false(inp->inp_flags & INP_DROPPED) ||
  482                     __predict_false((toep->flags & TPF_ATTACHED) == 0)) {
  483                         mbufq_drain(&mq);
  484                 } else {
  485                         mbufq_concat(&toep->ulp_pduq, &mq);
  486                         t4_push_pdus(icc->sc, toep, 0);
  487                 }
  488                 INP_WUNLOCK(inp);
  489                 NET_EPOCH_EXIT(et);
  490                 CURVNET_RESTORE();
  491 
  492                 ICL_CONN_LOCK(ic);
  493         }
  494 out:
  495         ICL_CONN_UNLOCK(ic);
  496 
  497         kthread_exit();
  498 }
  499 
  500 static void
  501 icl_cxgbei_rx_main(void *arg)
  502 {
  503         struct icl_cxgbei_conn *icc = arg;
  504         struct icl_conn *ic = &icc->ic;
  505         struct icl_pdu *ip;
  506         struct sockbuf *sb;
  507         STAILQ_HEAD(, icl_pdu) rx_pdus = STAILQ_HEAD_INITIALIZER(rx_pdus);
  508         bool cantrcvmore;
  509 
  510         sb = &ic->ic_socket->so_rcv;
  511         SOCKBUF_LOCK(sb);
  512         while (__predict_true(!ic->ic_disconnecting)) {
  513                 while (STAILQ_EMPTY(&icc->rcvd_pdus)) {
  514                         icc->rx_active = false;
  515                         mtx_sleep(&icc->rx_active, SOCKBUF_MTX(sb), 0, "-", 0);
  516                         if (__predict_false(ic->ic_disconnecting))
  517                                 goto out;
  518                         MPASS(icc->rx_active);
  519                 }
  520 
  521                 if (__predict_false(sbused(sb)) != 0) {
  522                         /*
  523                          * PDUs were received before the tid
  524                          * transitioned to ULP mode.  Convert
  525                          * them to icl_cxgbei_pdus and insert
  526                          * them into the head of rcvd_pdus.
  527                          */
  528                         parse_pdus(icc, sb);
  529                 }
  530                 cantrcvmore = (sb->sb_state & SBS_CANTRCVMORE) != 0;
  531                 MPASS(STAILQ_EMPTY(&rx_pdus));
  532                 STAILQ_SWAP(&icc->rcvd_pdus, &rx_pdus, icl_pdu);
  533                 SOCKBUF_UNLOCK(sb);
  534 
  535                 /* Hand over PDUs to ICL. */
  536                 while ((ip = STAILQ_FIRST(&rx_pdus)) != NULL) {
  537                         STAILQ_REMOVE_HEAD(&rx_pdus, ip_next);
  538                         if (cantrcvmore)
  539                                 icl_cxgbei_pdu_done(ip, ENOTCONN);
  540                         else
  541                                 ic->ic_receive(ip);
  542                 }
  543 
  544                 SOCKBUF_LOCK(sb);
  545         }
  546 out:
  547         /*
  548          * Since ic_disconnecting is set before the SOCKBUF_MTX is
  549          * locked in icl_cxgbei_conn_close, the loop above can exit
  550          * before icl_cxgbei_conn_close can lock SOCKBUF_MTX and block
  551          * waiting for the thread exit.
  552          */
  553         while (!icc->rx_exiting)
  554                 mtx_sleep(&icc->rx_active, SOCKBUF_MTX(sb), 0, "-", 0);
  555         SOCKBUF_UNLOCK(sb);
  556 
  557         kthread_exit();
  558 }
  559 
  560 static void
  561 cxgbei_free_mext_pg(struct mbuf *m)
  562 {
  563         struct icl_cxgbei_pdu *icp;
  564 
  565         M_ASSERTEXTPG(m);
  566 
  567         /*
  568          * Nothing to do for the pages; they are owned by the PDU /
  569          * I/O request.
  570          */
  571 
  572         /* Drop reference on the PDU. */
  573         icp = m->m_ext.ext_arg1;
  574         if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1)
  575                 icl_cxgbei_pdu_call_cb(&icp->ip);
  576 }
  577 
  578 static struct mbuf *
  579 cxgbei_getm(size_t len, int flags)
  580 {
  581         struct mbuf *m, *m0, *m_tail;
  582 
  583         m_tail = m0 = NULL;
  584 
  585         /* Allocate as jumbo mbufs of size MJUM16BYTES. */
  586         while (len >= MJUM16BYTES) {
  587                 m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES);
  588                 if (__predict_false(m == NULL)) {
  589                         if ((flags & M_WAITOK) != 0) {
  590                                 /* Fall back to non-jumbo mbufs. */
  591                                 break;
  592                         }
  593                         return (NULL);
  594                 }
  595                 if (m0 == NULL) {
  596                         m0 = m_tail = m;
  597                 } else {
  598                         m_tail->m_next = m;
  599                         m_tail = m;
  600                 }
  601                 len -= MJUM16BYTES;
  602         }
  603 
  604         /* Allocate mbuf chain for the remaining data. */
  605         if (len != 0) {
  606                 m = m_getm2(NULL, len, flags, MT_DATA, 0);
  607                 if (__predict_false(m == NULL)) {
  608                         m_freem(m0);
  609                         return (NULL);
  610                 }
  611                 if (m0 == NULL)
  612                         m0 = m;
  613                 else
  614                         m_tail->m_next = m;
  615         }
  616 
  617         return (m0);
  618 }
  619 
  620 int
  621 icl_cxgbei_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *ip,
  622     struct bio *bp, size_t offset, size_t len, int flags)
  623 {
  624         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  625         struct mbuf *m, *m_tail;
  626         vm_offset_t vaddr;
  627         size_t page_offset, todo, mtodo;
  628         boolean_t mapped;
  629         int i;
  630 
  631         MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE);
  632         MPASS(ic == ip->ip_conn);
  633         KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len));
  634 
  635         m_tail = ip->ip_data_mbuf;
  636         if (m_tail != NULL)
  637                 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next)
  638                         ;
  639 
  640         MPASS(bp->bio_flags & BIO_UNMAPPED);
  641         if (offset < PAGE_SIZE - bp->bio_ma_offset) {
  642                 page_offset = bp->bio_ma_offset + offset;
  643                 i = 0;
  644         } else {
  645                 offset -= PAGE_SIZE - bp->bio_ma_offset;
  646                 for (i = 1; offset >= PAGE_SIZE; i++)
  647                         offset -= PAGE_SIZE;
  648                 page_offset = offset;
  649         }
  650 
  651         if (flags & ICL_NOCOPY) {
  652                 m = NULL;
  653                 while (len > 0) {
  654                         if (m == NULL) {
  655                                 m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY,
  656                                     cxgbei_free_mext_pg);
  657                                 if (__predict_false(m == NULL))
  658                                         return (ENOMEM);
  659                                 atomic_add_int(&icp->ref_cnt, 1);
  660                                 m->m_ext.ext_arg1 = icp;
  661                                 m->m_epg_1st_off = page_offset;
  662                         }
  663 
  664                         todo = MIN(len, PAGE_SIZE - page_offset);
  665 
  666                         m->m_epg_pa[m->m_epg_npgs] =
  667                             VM_PAGE_TO_PHYS(bp->bio_ma[i]);
  668                         m->m_epg_npgs++;
  669                         m->m_epg_last_len = todo;
  670                         m->m_len += todo;
  671                         m->m_ext.ext_size += PAGE_SIZE;
  672                         MBUF_EXT_PGS_ASSERT_SANITY(m);
  673 
  674                         if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) {
  675                                 if (m_tail != NULL)
  676                                         m_tail->m_next = m;
  677                                 else
  678                                         ip->ip_data_mbuf = m;
  679                                 m_tail = m;
  680                                 ip->ip_data_len += m->m_len;
  681                                 m = NULL;
  682                         }
  683 
  684                         page_offset = 0;
  685                         len -= todo;
  686                         i++;
  687                 }
  688 
  689                 if (m != NULL) {
  690                         if (m_tail != NULL)
  691                                 m_tail->m_next = m;
  692                         else
  693                                 ip->ip_data_mbuf = m;
  694                         ip->ip_data_len += m->m_len;
  695                 }
  696                 return (0);
  697         }
  698 
  699         m = cxgbei_getm(len, flags);
  700         if (__predict_false(m == NULL))
  701                 return (ENOMEM);
  702 
  703         if (ip->ip_data_mbuf == NULL) {
  704                 ip->ip_data_mbuf = m;
  705                 ip->ip_data_len = len;
  706         } else {
  707                 m_tail->m_next = m;
  708                 ip->ip_data_len += len;
  709         }
  710 
  711         while (len > 0) {
  712                 todo = MIN(len, PAGE_SIZE - page_offset);
  713 
  714                 mapped = pmap_map_io_transient(bp->bio_ma + i, &vaddr, 1,
  715                     FALSE);
  716 
  717                 do {
  718                         mtodo = min(todo, M_SIZE(m) - m->m_len);
  719                         memcpy(mtod(m, char *) + m->m_len, (char *)vaddr +
  720                             page_offset, mtodo);
  721                         m->m_len += mtodo;
  722                         if (m->m_len == M_SIZE(m))
  723                                 m = m->m_next;
  724                         page_offset += mtodo;
  725                         todo -= mtodo;
  726                 } while (todo > 0);
  727 
  728                 if (__predict_false(mapped))
  729                         pmap_unmap_io_transient(bp->bio_ma + 1, &vaddr, 1,
  730                             FALSE);
  731 
  732                 page_offset = 0;
  733                 len -= todo;
  734                 i++;
  735         }
  736 
  737         MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length,
  738             ic->ic_hw_isomax));
  739 
  740         return (0);
  741 }
  742 
  743 int
  744 icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip,
  745     const void *addr, size_t len, int flags)
  746 {
  747         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  748         struct mbuf *m, *m_tail;
  749         const char *src;
  750 
  751         MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE);
  752         MPASS(ic == ip->ip_conn);
  753         KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len));
  754 
  755         m_tail = ip->ip_data_mbuf;
  756         if (m_tail != NULL)
  757                 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next)
  758                         ;
  759 
  760         if (flags & ICL_NOCOPY) {
  761                 m = m_get(flags & ~ICL_NOCOPY, MT_DATA);
  762                 if (m == NULL) {
  763                         ICL_WARN("failed to allocate mbuf");
  764                         return (ENOMEM);
  765                 }
  766 
  767                 m->m_flags |= M_RDONLY;
  768                 m_extaddref(m, __DECONST(char *, addr), len, &icp->ref_cnt,
  769                     icl_cxgbei_mbuf_done, icp, NULL);
  770                 m->m_len = len;
  771                 if (ip->ip_data_mbuf == NULL) {
  772                         ip->ip_data_mbuf = m;
  773                         ip->ip_data_len = len;
  774                 } else {
  775                         m_tail->m_next = m;
  776                         m_tail = m_tail->m_next;
  777                         ip->ip_data_len += len;
  778                 }
  779 
  780                 return (0);
  781         }
  782 
  783         m = cxgbei_getm(len, flags);
  784         if (__predict_false(m == NULL))
  785                 return (ENOMEM);
  786 
  787         if (ip->ip_data_mbuf == NULL) {
  788                 ip->ip_data_mbuf = m;
  789                 ip->ip_data_len = len;
  790         } else {
  791                 m_tail->m_next = m;
  792                 ip->ip_data_len += len;
  793         }
  794         src = (const char *)addr;
  795         for (; m != NULL; m = m->m_next) {
  796                 m->m_len = min(len, M_SIZE(m));
  797                 memcpy(mtod(m, void *), src, m->m_len);
  798                 src += m->m_len;
  799                 len -= m->m_len;
  800         }
  801         MPASS(len == 0);
  802 
  803         MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length,
  804             ic->ic_hw_isomax));
  805 
  806         return (0);
  807 }
  808 
  809 void
  810 icl_cxgbei_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip,
  811     size_t pdu_off, struct bio *bp, size_t bio_off, size_t len)
  812 {
  813         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  814         vm_offset_t vaddr;
  815         size_t page_offset, todo;
  816         boolean_t mapped;
  817         int i;
  818 
  819         if (icp->icp_flags & ICPF_RX_DDP)
  820                 return; /* data is DDP'ed, no need to copy */
  821 
  822         MPASS(bp->bio_flags & BIO_UNMAPPED);
  823         if (bio_off < PAGE_SIZE - bp->bio_ma_offset) {
  824                 page_offset = bp->bio_ma_offset + bio_off;
  825                 i = 0;
  826         } else {
  827                 bio_off -= PAGE_SIZE - bp->bio_ma_offset;
  828                 for (i = 1; bio_off >= PAGE_SIZE; i++)
  829                         bio_off -= PAGE_SIZE;
  830                 page_offset = bio_off;
  831         }
  832 
  833         while (len > 0) {
  834                 todo = MIN(len, PAGE_SIZE - page_offset);
  835 
  836                 mapped = pmap_map_io_transient(bp->bio_ma + i, &vaddr, 1,
  837                     FALSE);
  838                 m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr +
  839                     page_offset);
  840                 if (__predict_false(mapped))
  841                         pmap_unmap_io_transient(bp->bio_ma + 1, &vaddr, 1,
  842                             FALSE);
  843 
  844                 page_offset = 0;
  845                 pdu_off += todo;
  846                 len -= todo;
  847                 i++;
  848         }
  849 }
  850 
  851 void
  852 icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
  853     size_t off, void *addr, size_t len)
  854 {
  855         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  856 
  857         if (icp->icp_flags & ICPF_RX_DDP)
  858                 return; /* data is DDP'ed, no need to copy */
  859         m_copydata(ip->ip_data_mbuf, off, len, addr);
  860 }
  861 
  862 void
  863 icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
  864 {
  865         icl_cxgbei_conn_pdu_queue_cb(ic, ip, NULL);
  866 }
  867 
  868 void
  869 icl_cxgbei_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip,
  870                              icl_pdu_cb cb)
  871 {
  872         struct icl_cxgbei_conn *icc = ic_to_icc(ic);
  873         struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
  874         struct socket *so = ic->ic_socket;
  875 
  876         MPASS(ic == ip->ip_conn);
  877         MPASS(ip->ip_bhs_mbuf != NULL);
  878         /* The kernel doesn't generate PDUs with AHS. */
  879         MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0);
  880 
  881         ICL_CONN_LOCK_ASSERT(ic);
  882 
  883         icp->cb = cb;
  884 
  885         /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */
  886         if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) {
  887                 icl_cxgbei_pdu_done(ip, ENOTCONN);
  888                 return;
  889         }
  890 
  891         STAILQ_INSERT_TAIL(&icc->sent_pdus, ip, ip_next);
  892         if (!icc->tx_active) {
  893                 icc->tx_active = true;
  894                 wakeup(&icc->tx_active);
  895         }
  896 }
  897 
  898 static struct icl_conn *
  899 icl_cxgbei_new_conn(const char *name, struct mtx *lock)
  900 {
  901         struct icl_cxgbei_conn *icc;
  902         struct icl_conn *ic;
  903 
  904         refcount_acquire(&icl_cxgbei_ncons);
  905 
  906         icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE,
  907             M_WAITOK | M_ZERO);
  908         icc->icc_signature = CXGBEI_CONN_SIGNATURE;
  909         STAILQ_INIT(&icc->rcvd_pdus);
  910         STAILQ_INIT(&icc->sent_pdus);
  911 
  912         icc->cmp_table = hashinit(64, M_CXGBEI, &icc->cmp_hash_mask);
  913         mtx_init(&icc->cmp_lock, "cxgbei_cmp", NULL, MTX_DEF);
  914 
  915         ic = &icc->ic;
  916         ic->ic_lock = lock;
  917 
  918 #ifdef DIAGNOSTIC
  919         refcount_init(&ic->ic_outstanding_pdus, 0);
  920 #endif
  921         ic->ic_name = name;
  922         ic->ic_offload = "cxgbei";
  923         ic->ic_unmapped = true;
  924 
  925         CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc);
  926 
  927         return (ic);
  928 }
  929 
  930 void
  931 icl_cxgbei_conn_free(struct icl_conn *ic)
  932 {
  933         struct icl_cxgbei_conn *icc = ic_to_icc(ic);
  934 
  935         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
  936 
  937         CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc);
  938 
  939         mtx_destroy(&icc->cmp_lock);
  940         hashdestroy(icc->cmp_table, M_CXGBEI, icc->cmp_hash_mask);
  941         kobj_delete((struct kobj *)icc, M_CXGBE);
  942         refcount_release(&icl_cxgbei_ncons);
  943 }
  944 
  945 static int
  946 icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so, int sspace,
  947     int rspace)
  948 {
  949         struct sockopt opt;
  950         int error, one = 1, ss, rs;
  951 
  952         ss = max(sendspace, sspace);
  953         rs = max(recvspace, rspace);
  954 
  955         error = soreserve(so, ss, rs);
  956         if (error != 0)
  957                 return (error);
  958         SOCKBUF_LOCK(&so->so_snd);
  959         so->so_snd.sb_flags |= SB_AUTOSIZE;
  960         SOCKBUF_UNLOCK(&so->so_snd);
  961         SOCKBUF_LOCK(&so->so_rcv);
  962         so->so_rcv.sb_flags |= SB_AUTOSIZE;
  963         SOCKBUF_UNLOCK(&so->so_rcv);
  964 
  965         /*
  966          * Disable Nagle.
  967          */
  968         bzero(&opt, sizeof(opt));
  969         opt.sopt_dir = SOPT_SET;
  970         opt.sopt_level = IPPROTO_TCP;
  971         opt.sopt_name = TCP_NODELAY;
  972         opt.sopt_val = &one;
  973         opt.sopt_valsize = sizeof(one);
  974         error = sosetopt(so, &opt);
  975         if (error != 0)
  976                 return (error);
  977 
  978         return (0);
  979 }
  980 
  981 /*
  982  * Request/response structure used to find out the adapter offloading a socket.
  983  */
  984 struct find_ofld_adapter_rr {
  985         struct socket *so;
  986         struct adapter *sc;     /* result */
  987 };
  988 
  989 static void
  990 find_offload_adapter(struct adapter *sc, void *arg)
  991 {
  992         struct find_ofld_adapter_rr *fa = arg;
  993         struct socket *so = fa->so;
  994         struct tom_data *td = sc->tom_softc;
  995         struct tcpcb *tp;
  996         struct inpcb *inp;
  997 
  998         /* Non-TCP were filtered out earlier. */
  999         MPASS(so->so_proto->pr_protocol == IPPROTO_TCP);
 1000 
 1001         if (fa->sc != NULL)
 1002                 return; /* Found already. */
 1003 
 1004         if (td == NULL)
 1005                 return; /* TOE not enabled on this adapter. */
 1006 
 1007         inp = sotoinpcb(so);
 1008         INP_WLOCK(inp);
 1009         if ((inp->inp_flags & INP_DROPPED) == 0) {
 1010                 tp = intotcpcb(inp);
 1011                 if (tp->t_flags & TF_TOE && tp->tod == &td->tod)
 1012                         fa->sc = sc;    /* Found. */
 1013         }
 1014         INP_WUNLOCK(inp);
 1015 }
 1016 
 1017 static bool
 1018 is_memfree(struct adapter *sc)
 1019 {
 1020         uint32_t em;
 1021 
 1022         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 1023         if ((em & F_EXT_MEM_ENABLE) != 0)
 1024                 return (false);
 1025         if (is_t5(sc) && (em & F_EXT_MEM1_ENABLE) != 0)
 1026                 return (false);
 1027         return (true);
 1028 }
 1029 
 1030 /* XXXNP: move this to t4_tom. */
 1031 static void
 1032 send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen)
 1033 {
 1034         struct wrqe *wr;
 1035         struct fw_flowc_wr *flowc;
 1036         const u_int nparams = 1;
 1037         u_int flowclen;
 1038         struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 1039 
 1040         flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 1041 
 1042         wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq);
 1043         if (wr == NULL) {
 1044                 /* XXX */
 1045                 panic("%s: allocation failure.", __func__);
 1046         }
 1047         flowc = wrtod(wr);
 1048         memset(flowc, 0, wr->wr_len);
 1049 
 1050         flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 1051             V_FW_FLOWC_WR_NPARAMS(nparams));
 1052         flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 1053             V_FW_WR_FLOWID(toep->tid));
 1054 
 1055         flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX;
 1056         flowc->mnemval[0].val = htobe32(maxlen);
 1057 
 1058         txsd->tx_credits = howmany(flowclen, 16);
 1059         txsd->plen = 0;
 1060         KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
 1061             ("%s: not enough credits (%d)", __func__, toep->tx_credits));
 1062         toep->tx_credits -= txsd->tx_credits;
 1063         if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 1064                 toep->txsd_pidx = 0;
 1065         toep->txsd_avail--;
 1066 
 1067         t4_wrq_tx(sc, wr);
 1068 }
 1069 
 1070 static void
 1071 set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, u_int ulp_submode)
 1072 {
 1073         uint64_t val;
 1074 
 1075         CTR3(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, submode=%#x",
 1076             __func__, toep->tid, ulp_submode);
 1077 
 1078         val = V_TCB_ULP_TYPE(ULP_MODE_ISCSI) | V_TCB_ULP_RAW(ulp_submode);
 1079         t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_ULP_TYPE,
 1080             V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val,
 1081             0, 0);
 1082 
 1083         val = V_TF_RX_FLOW_CONTROL_DISABLE(1ULL);
 1084         t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, val, val, 0, 0);
 1085 }
 1086 
 1087 /*
 1088  * XXXNP: Who is responsible for cleaning up the socket if this returns with an
 1089  * error?  Review all error paths.
 1090  *
 1091  * XXXNP: What happens to the socket's fd reference if the operation is
 1092  * successful, and how does that affect the socket's life cycle?
 1093  */
 1094 int
 1095 icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd)
 1096 {
 1097         struct icl_cxgbei_conn *icc = ic_to_icc(ic);
 1098         struct find_ofld_adapter_rr fa;
 1099         struct file *fp;
 1100         struct socket *so;
 1101         struct inpcb *inp;
 1102         struct tcpcb *tp;
 1103         struct toepcb *toep;
 1104         cap_rights_t rights;
 1105         u_int max_iso_payload, max_rx_pdu_len, max_tx_pdu_len;
 1106         int error, max_iso_pdus;
 1107 
 1108         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
 1109         ICL_CONN_LOCK_ASSERT_NOT(ic);
 1110 
 1111         /*
 1112          * Steal the socket from userland.
 1113          */
 1114         error = fget(curthread, fd,
 1115             cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp);
 1116         if (error != 0)
 1117                 return (error);
 1118         if (fp->f_type != DTYPE_SOCKET) {
 1119                 fdrop(fp, curthread);
 1120                 return (EINVAL);
 1121         }
 1122         so = fp->f_data;
 1123         if (so->so_type != SOCK_STREAM ||
 1124             so->so_proto->pr_protocol != IPPROTO_TCP) {
 1125                 fdrop(fp, curthread);
 1126                 return (EINVAL);
 1127         }
 1128 
 1129         ICL_CONN_LOCK(ic);
 1130         if (ic->ic_socket != NULL) {
 1131                 ICL_CONN_UNLOCK(ic);
 1132                 fdrop(fp, curthread);
 1133                 return (EBUSY);
 1134         }
 1135         ic->ic_disconnecting = false;
 1136         ic->ic_socket = so;
 1137         fp->f_ops = &badfileops;
 1138         fp->f_data = NULL;
 1139         fdrop(fp, curthread);
 1140         ICL_CONN_UNLOCK(ic);
 1141 
 1142         /* Find the adapter offloading this socket. */
 1143         fa.sc = NULL;
 1144         fa.so = so;
 1145         t4_iterate(find_offload_adapter, &fa);
 1146         if (fa.sc == NULL) {
 1147                 error = EINVAL;
 1148                 goto out;
 1149         }
 1150         icc->sc = fa.sc;
 1151 
 1152         max_rx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_recv_data_segment_length;
 1153         max_tx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_send_data_segment_length;
 1154         if (ic->ic_header_crc32c) {
 1155                 max_rx_pdu_len += ISCSI_HEADER_DIGEST_SIZE;
 1156                 max_tx_pdu_len += ISCSI_HEADER_DIGEST_SIZE;
 1157         }
 1158         if (ic->ic_data_crc32c) {
 1159                 max_rx_pdu_len += ISCSI_DATA_DIGEST_SIZE;
 1160                 max_tx_pdu_len += ISCSI_DATA_DIGEST_SIZE;
 1161         }
 1162 
 1163         inp = sotoinpcb(so);
 1164         INP_WLOCK(inp);
 1165         tp = intotcpcb(inp);
 1166         if (inp->inp_flags & INP_DROPPED) {
 1167                 INP_WUNLOCK(inp);
 1168                 error = ENOTCONN;
 1169                 goto out;
 1170         }
 1171 
 1172         /*
 1173          * socket could not have been "unoffloaded" if here.
 1174          */
 1175         MPASS(tp->t_flags & TF_TOE);
 1176         MPASS(tp->tod != NULL);
 1177         MPASS(tp->t_toe != NULL);
 1178         toep = tp->t_toe;
 1179         MPASS(toep->vi->adapter == icc->sc);
 1180 
 1181         if (ulp_mode(toep) != ULP_MODE_NONE) {
 1182                 INP_WUNLOCK(inp);
 1183                 error = EINVAL;
 1184                 goto out;
 1185         }
 1186 
 1187         icc->toep = toep;
 1188 
 1189         icc->ulp_submode = 0;
 1190         if (ic->ic_header_crc32c)
 1191                 icc->ulp_submode |= ULP_CRC_HEADER;
 1192         if (ic->ic_data_crc32c)
 1193                 icc->ulp_submode |= ULP_CRC_DATA;
 1194 
 1195         if (icc->sc->tt.iso && chip_id(icc->sc) >= CHELSIO_T5 &&
 1196             !is_memfree(icc->sc)) {
 1197                 max_iso_payload = rounddown(CXGBEI_MAX_ISO_PAYLOAD,
 1198                     tp->t_maxseg);
 1199                 max_iso_pdus = max_iso_payload / max_tx_pdu_len;
 1200                 ic->ic_hw_isomax = max_iso_pdus *
 1201                     ic->ic_max_send_data_segment_length;
 1202         } else
 1203                 max_iso_pdus = 1;
 1204 
 1205         toep->params.ulp_mode = ULP_MODE_ISCSI;
 1206         toep->ulpcb = icc;
 1207 
 1208         send_iscsi_flowc_wr(icc->sc, toep,
 1209             roundup(max_iso_pdus * max_tx_pdu_len, tp->t_maxseg));
 1210         set_ulp_mode_iscsi(icc->sc, toep, icc->ulp_submode);
 1211         INP_WUNLOCK(inp);
 1212 
 1213         error = kthread_add(icl_cxgbei_tx_main, icc, NULL, &icc->tx_thread, 0,
 1214             0, "%stx (cxgbei)", ic->ic_name);
 1215         if (error != 0)
 1216                 goto out;
 1217 
 1218         error = kthread_add(icl_cxgbei_rx_main, icc, NULL, &icc->rx_thread, 0,
 1219             0, "%srx (cxgbei)", ic->ic_name);
 1220         if (error != 0)
 1221                 goto out;
 1222 
 1223         error = icl_cxgbei_setsockopt(ic, so, max_tx_pdu_len, max_rx_pdu_len);
 1224 out:
 1225         if (error != 0)
 1226                 icl_cxgbei_conn_close(ic);
 1227         return (error);
 1228 }
 1229 
 1230 void
 1231 icl_cxgbei_conn_close(struct icl_conn *ic)
 1232 {
 1233         struct icl_cxgbei_conn *icc = ic_to_icc(ic);
 1234         struct icl_pdu *ip;
 1235         struct socket *so;
 1236         struct sockbuf *sb;
 1237         struct inpcb *inp;
 1238         struct toepcb *toep = icc->toep;
 1239 
 1240         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
 1241         ICL_CONN_LOCK_ASSERT_NOT(ic);
 1242 
 1243         ICL_CONN_LOCK(ic);
 1244         so = ic->ic_socket;
 1245         if (ic->ic_disconnecting || so == NULL) {
 1246                 CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p",
 1247                     __func__, icc, ic->ic_disconnecting, so);
 1248                 ICL_CONN_UNLOCK(ic);
 1249                 return;
 1250         }
 1251         ic->ic_disconnecting = true;
 1252 
 1253 #ifdef DIAGNOSTIC
 1254         KASSERT(ic->ic_outstanding_pdus == 0,
 1255             ("destroying session with %d outstanding PDUs",
 1256              ic->ic_outstanding_pdus));
 1257 #endif
 1258 
 1259         CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1,
 1260             icc);
 1261 
 1262         /*
 1263          * Wait for the transmit thread to stop processing
 1264          * this connection.
 1265          */
 1266         if (icc->tx_thread != NULL) {
 1267                 wakeup(&icc->tx_active);
 1268                 mtx_sleep(icc->tx_thread, ic->ic_lock, 0, "conclo", 0);
 1269         }
 1270 
 1271         /* Discard PDUs queued for TX. */
 1272         while (!STAILQ_EMPTY(&icc->sent_pdus)) {
 1273                 ip = STAILQ_FIRST(&icc->sent_pdus);
 1274                 STAILQ_REMOVE_HEAD(&icc->sent_pdus, ip_next);
 1275                 icl_cxgbei_pdu_done(ip, ENOTCONN);
 1276         }
 1277         ICL_CONN_UNLOCK(ic);
 1278 
 1279         inp = sotoinpcb(so);
 1280         sb = &so->so_rcv;
 1281 
 1282         /*
 1283          * Wait for the receive thread to stop processing this
 1284          * connection.
 1285          */
 1286         SOCKBUF_LOCK(sb);
 1287         if (icc->rx_thread != NULL) {
 1288                 icc->rx_exiting = true;
 1289                 wakeup(&icc->rx_active);
 1290                 mtx_sleep(icc->rx_thread, SOCKBUF_MTX(sb), 0, "conclo", 0);
 1291         }
 1292 
 1293         /*
 1294          * Discard received PDUs not passed to the iSCSI layer.
 1295          */
 1296         while (!STAILQ_EMPTY(&icc->rcvd_pdus)) {
 1297                 ip = STAILQ_FIRST(&icc->rcvd_pdus);
 1298                 STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next);
 1299                 icl_cxgbei_pdu_done(ip, ENOTCONN);
 1300         }
 1301         SOCKBUF_UNLOCK(sb);
 1302 
 1303         INP_WLOCK(inp);
 1304         if (toep != NULL) {     /* NULL if connection was never offloaded. */
 1305                 toep->ulpcb = NULL;
 1306 
 1307                 /* Discard mbufs queued for TX. */
 1308                 mbufq_drain(&toep->ulp_pduq);
 1309 
 1310                 /*
 1311                  * Grab a reference to use when waiting for the final
 1312                  * CPL to be received.  If toep->inp is NULL, then
 1313                  * final_cpl_received() has already been called (e.g.
 1314                  * due to the peer sending a RST).
 1315                  */
 1316                 if (toep->inp != NULL) {
 1317                         toep = hold_toepcb(toep);
 1318                         toep->flags |= TPF_WAITING_FOR_FINAL;
 1319                 } else
 1320                         toep = NULL;
 1321         }
 1322         INP_WUNLOCK(inp);
 1323 
 1324         ICL_CONN_LOCK(ic);
 1325         ic->ic_socket = NULL;
 1326         ICL_CONN_UNLOCK(ic);
 1327 
 1328         /*
 1329          * XXXNP: we should send RST instead of FIN when PDUs held in various
 1330          * queues were purged instead of delivered reliably but soabort isn't
 1331          * really general purpose and wouldn't do the right thing here.
 1332          */
 1333         soclose(so);
 1334 
 1335         /*
 1336          * Wait for the socket to fully close.  This ensures any
 1337          * pending received data has been received (and in particular,
 1338          * any data that would be received by DDP has been handled).
 1339          * Callers assume that it is safe to free buffers for tasks
 1340          * and transfers after this function returns.
 1341          */
 1342         if (toep != NULL) {
 1343                 struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep);
 1344 
 1345                 mtx_lock(lock);
 1346                 while ((toep->flags & TPF_WAITING_FOR_FINAL) != 0)
 1347                         mtx_sleep(toep, lock, PSOCK, "conclo2", 0);
 1348                 mtx_unlock(lock);
 1349                 free_toepcb(toep);
 1350         }
 1351 }
 1352 
 1353 static void
 1354 cxgbei_insert_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp,
 1355     uint32_t tt)
 1356 {
 1357 #ifdef INVARIANTS
 1358         struct cxgbei_cmp *cmp2;
 1359 #endif
 1360 
 1361         cmp->tt = tt;
 1362 
 1363         mtx_lock(&icc->cmp_lock);
 1364 #ifdef INVARIANTS
 1365         LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, tt)], link) {
 1366                 KASSERT(cmp2->tt != tt, ("%s: duplicate cmp", __func__));
 1367         }
 1368 #endif
 1369         LIST_INSERT_HEAD(&icc->cmp_table[TT_HASH(icc, tt)], cmp, link);
 1370         mtx_unlock(&icc->cmp_lock);
 1371 }
 1372 
 1373 struct cxgbei_cmp *
 1374 cxgbei_find_cmp(struct icl_cxgbei_conn *icc, uint32_t tt)
 1375 {
 1376         struct cxgbei_cmp *cmp;
 1377 
 1378         mtx_lock(&icc->cmp_lock);
 1379         LIST_FOREACH(cmp, &icc->cmp_table[TT_HASH(icc, tt)], link) {
 1380                 if (cmp->tt == tt)
 1381                         break;
 1382         }
 1383         mtx_unlock(&icc->cmp_lock);
 1384         return (cmp);
 1385 }
 1386 
 1387 static void
 1388 cxgbei_rm_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp)
 1389 {
 1390 #ifdef INVARIANTS
 1391         struct cxgbei_cmp *cmp2;
 1392 #endif
 1393 
 1394         mtx_lock(&icc->cmp_lock);
 1395 
 1396 #ifdef INVARIANTS
 1397         LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, cmp->tt)], link) {
 1398                 if (cmp2 == cmp)
 1399                         goto found;
 1400         }
 1401         panic("%s: could not find cmp", __func__);
 1402 found:
 1403 #endif
 1404         LIST_REMOVE(cmp, link);
 1405         mtx_unlock(&icc->cmp_lock);
 1406 }
 1407 
 1408 int
 1409 icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
 1410     struct ccb_scsiio *csio, uint32_t *ittp, void **arg)
 1411 {
 1412         struct icl_cxgbei_conn *icc = ic_to_icc(ic);
 1413         struct toepcb *toep = icc->toep;
 1414         struct adapter *sc = icc->sc;
 1415         struct cxgbei_data *ci = sc->iscsi_ulp_softc;
 1416         struct ppod_region *pr = &ci->pr;
 1417         struct cxgbei_ddp_state *ddp;
 1418         struct ppod_reservation *prsv;
 1419         struct inpcb *inp;
 1420         struct mbufq mq;
 1421         uint32_t itt;
 1422         int rc = 0;
 1423 
 1424         ICL_CONN_LOCK_ASSERT(ic);
 1425 
 1426         /* This is for the offload driver's state.  Must not be set already. */
 1427         MPASS(arg != NULL);
 1428         MPASS(*arg == NULL);
 1429 
 1430         if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_IN ||
 1431             csio->dxfer_len < ci->ddp_threshold || ic->ic_disconnecting ||
 1432             ic->ic_socket == NULL) {
 1433 no_ddp:
 1434                 /*
 1435                  * No DDP for this I/O.  Allocate an ITT (based on the one
 1436                  * passed in) that cannot be a valid hardware DDP tag in the
 1437                  * iSCSI region.
 1438                  */
 1439                 itt = *ittp & M_PPOD_TAG;
 1440                 itt = V_PPOD_TAG(itt) | pr->pr_invalid_bit;
 1441                 *ittp = htobe32(itt);
 1442                 MPASS(*arg == NULL);    /* State is maintained for DDP only. */
 1443                 if (rc != 0)
 1444                         counter_u64_add(
 1445                             toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1);
 1446                 return (0);
 1447         }
 1448 
 1449         /*
 1450          * Reserve resources for DDP, update the itt that should be used in the
 1451          * PDU, and save DDP specific state for this I/O in *arg.
 1452          */
 1453         ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO);
 1454         if (ddp == NULL) {
 1455                 rc = ENOMEM;
 1456                 goto no_ddp;
 1457         }
 1458         prsv = &ddp->prsv;
 1459 
 1460         mbufq_init(&mq, INT_MAX);
 1461         switch (csio->ccb_h.flags & CAM_DATA_MASK) {
 1462         case CAM_DATA_BIO:
 1463                 rc = t4_alloc_page_pods_for_bio(pr,
 1464                     (struct bio *)csio->data_ptr, prsv);
 1465                 if (rc != 0) {
 1466                         free(ddp, M_CXGBEI);
 1467                         goto no_ddp;
 1468                 }
 1469 
 1470                 rc = t4_write_page_pods_for_bio(sc, toep, prsv,
 1471                     (struct bio *)csio->data_ptr, &mq);
 1472                 if (__predict_false(rc != 0)) {
 1473                         mbufq_drain(&mq);
 1474                         t4_free_page_pods(prsv);
 1475                         free(ddp, M_CXGBEI);
 1476                         goto no_ddp;
 1477                 }
 1478                 break;
 1479         case CAM_DATA_VADDR:
 1480                 rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr,
 1481                     csio->dxfer_len, prsv);
 1482                 if (rc != 0) {
 1483                         free(ddp, M_CXGBEI);
 1484                         goto no_ddp;
 1485                 }
 1486 
 1487                 rc = t4_write_page_pods_for_buf(sc, toep, prsv,
 1488                     (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq);
 1489                 if (__predict_false(rc != 0)) {
 1490                         mbufq_drain(&mq);
 1491                         t4_free_page_pods(prsv);
 1492                         free(ddp, M_CXGBEI);
 1493                         goto no_ddp;
 1494                 }
 1495                 break;
 1496         default:
 1497                 free(ddp, M_CXGBEI);
 1498                 rc = EINVAL;
 1499                 goto no_ddp;
 1500         }
 1501 
 1502         /*
 1503          * Do not get inp from toep->inp as the toepcb might have
 1504          * detached already.
 1505          */
 1506         inp = sotoinpcb(ic->ic_socket);
 1507         INP_WLOCK(inp);
 1508         if ((inp->inp_flags & INP_DROPPED) != 0) {
 1509                 INP_WUNLOCK(inp);
 1510                 mbufq_drain(&mq);
 1511                 t4_free_page_pods(prsv);
 1512                 free(ddp, M_CXGBEI);
 1513                 goto no_ddp;
 1514         }
 1515         mbufq_concat(&toep->ulp_pduq, &mq);
 1516         INP_WUNLOCK(inp);
 1517 
 1518         ddp->cmp.last_datasn = -1;
 1519         cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
 1520         *ittp = htobe32(prsv->prsv_tag);
 1521         *arg = prsv;
 1522         counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1);
 1523         return (0);
 1524 }
 1525 
 1526 void
 1527 icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg)
 1528 {
 1529 
 1530         if (arg != NULL) {
 1531                 struct cxgbei_ddp_state *ddp = arg;
 1532 
 1533                 cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp);
 1534                 t4_free_page_pods(&ddp->prsv);
 1535                 free(ddp, M_CXGBEI);
 1536         }
 1537 }
 1538 
 1539 static inline bool
 1540 ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen)
 1541 {
 1542 #ifdef INVARIANTS
 1543         int total_len = 0;
 1544 #endif
 1545 
 1546         MPASS(entries > 0);
 1547         if (((vm_offset_t)sg[--entries].addr & 3U) != 0)
 1548                 return (false);
 1549 
 1550 #ifdef INVARIANTS
 1551         total_len += sg[entries].len;
 1552 #endif
 1553 
 1554         while (--entries >= 0) {
 1555                 if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 ||
 1556                     (sg[entries].len % PAGE_SIZE) != 0)
 1557                         return (false);
 1558 #ifdef INVARIANTS
 1559                 total_len += sg[entries].len;
 1560 #endif
 1561         }
 1562 
 1563         MPASS(total_len == xferlen);
 1564         return (true);
 1565 }
 1566 
 1567 #define io_to_ddp_state(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
 1568 
 1569 int
 1570 icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip,
 1571     union ctl_io *io, uint32_t *tttp, void **arg)
 1572 {
 1573         struct icl_cxgbei_conn *icc = ic_to_icc(ic);
 1574         struct toepcb *toep = icc->toep;
 1575         struct ctl_scsiio *ctsio = &io->scsiio;
 1576         struct adapter *sc = icc->sc;
 1577         struct cxgbei_data *ci = sc->iscsi_ulp_softc;
 1578         struct ppod_region *pr = &ci->pr;
 1579         struct cxgbei_ddp_state *ddp;
 1580         struct ppod_reservation *prsv;
 1581         struct ctl_sg_entry *sgl, sg_entry;
 1582         struct inpcb *inp;
 1583         struct mbufq mq;
 1584         int sg_entries = ctsio->kern_sg_entries;
 1585         uint32_t ttt;
 1586         int xferlen, rc = 0, alias;
 1587 
 1588         /* This is for the offload driver's state.  Must not be set already. */
 1589         MPASS(arg != NULL);
 1590         MPASS(*arg == NULL);
 1591 
 1592         if (ctsio->ext_data_filled == 0) {
 1593                 int first_burst;
 1594 #ifdef INVARIANTS
 1595                 struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
 1596 
 1597                 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE);
 1598                 MPASS(ic == ip->ip_conn);
 1599                 MPASS(ip->ip_bhs_mbuf != NULL);
 1600 #endif
 1601                 first_burst = icl_pdu_data_segment_length(ip);
 1602 
 1603                 /*
 1604                  * Note that ICL calls conn_transfer_setup even if the first
 1605                  * burst had everything and there's nothing left to transfer.
 1606                  *
 1607                  * NB: The CTL frontend might have provided a buffer
 1608                  * whose length (kern_data_len) is smaller than the
 1609                  * FirstBurstLength of unsolicited data.  Treat those
 1610                  * as an empty transfer.
 1611                  */
 1612                 xferlen = ctsio->kern_data_len;
 1613                 if (xferlen < first_burst ||
 1614                     xferlen - first_burst < ci->ddp_threshold) {
 1615 no_ddp:
 1616                         /*
 1617                          * No DDP for this transfer.  Allocate a TTT (based on
 1618                          * the one passed in) that cannot be a valid hardware
 1619                          * DDP tag in the iSCSI region.
 1620                          */
 1621                         ttt = *tttp & M_PPOD_TAG;
 1622                         ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit;
 1623                         *tttp = htobe32(ttt);
 1624                         MPASS(io_to_ddp_state(io) == NULL);
 1625                         if (rc != 0)
 1626                                 counter_u64_add(
 1627                                     toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1);
 1628                         return (0);
 1629                 }
 1630 
 1631                 if (sg_entries == 0) {
 1632                         sgl = &sg_entry;
 1633                         sgl->len = xferlen;
 1634                         sgl->addr = (void *)ctsio->kern_data_ptr;
 1635                         sg_entries = 1;
 1636                 } else
 1637                         sgl = (void *)ctsio->kern_data_ptr;
 1638 
 1639                 if (!ddp_sgl_check(sgl, sg_entries, xferlen))
 1640                         goto no_ddp;
 1641 
 1642                 /*
 1643                  * Reserve resources for DDP, update the ttt that should be used
 1644                  * in the PDU, and save DDP specific state for this I/O.
 1645                  */
 1646                 MPASS(io_to_ddp_state(io) == NULL);
 1647                 ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO);
 1648                 if (ddp == NULL) {
 1649                         rc = ENOMEM;
 1650                         goto no_ddp;
 1651                 }
 1652                 prsv = &ddp->prsv;
 1653 
 1654                 rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv);
 1655                 if (rc != 0) {
 1656                         free(ddp, M_CXGBEI);
 1657                         goto no_ddp;
 1658                 }
 1659 
 1660                 mbufq_init(&mq, INT_MAX);
 1661                 rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries,
 1662                     xferlen, &mq);
 1663                 if (__predict_false(rc != 0)) {
 1664                         mbufq_drain(&mq);
 1665                         t4_free_page_pods(prsv);
 1666                         free(ddp, M_CXGBEI);
 1667                         goto no_ddp;
 1668                 }
 1669 
 1670                 /*
 1671                  * Do not get inp from toep->inp as the toepcb might
 1672                  * have detached already.
 1673                  */
 1674                 ICL_CONN_LOCK(ic);
 1675                 if (ic->ic_disconnecting || ic->ic_socket == NULL) {
 1676                         ICL_CONN_UNLOCK(ic);
 1677                         mbufq_drain(&mq);
 1678                         t4_free_page_pods(prsv);
 1679                         free(ddp, M_CXGBEI);
 1680                         return (ECONNRESET);
 1681                 }
 1682                 inp = sotoinpcb(ic->ic_socket);
 1683                 INP_WLOCK(inp);
 1684                 ICL_CONN_UNLOCK(ic);
 1685                 if ((inp->inp_flags & INP_DROPPED) != 0) {
 1686                         INP_WUNLOCK(inp);
 1687                         mbufq_drain(&mq);
 1688                         t4_free_page_pods(prsv);
 1689                         free(ddp, M_CXGBEI);
 1690                         return (ECONNRESET);
 1691                 }
 1692                 mbufq_concat(&toep->ulp_pduq, &mq);
 1693                 INP_WUNLOCK(inp);
 1694 
 1695                 ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset +
 1696                     first_burst;
 1697                 ddp->cmp.last_datasn = -1;
 1698                 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
 1699                 *tttp = htobe32(prsv->prsv_tag);
 1700                 io_to_ddp_state(io) = ddp;
 1701                 *arg = ctsio;
 1702                 counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1);
 1703                 return (0);
 1704         }
 1705 
 1706         /*
 1707          * In the middle of an I/O.  A non-NULL page pod reservation indicates
 1708          * that a DDP buffer is being used for the I/O.
 1709          */
 1710         ddp = io_to_ddp_state(ctsio);
 1711         if (ddp == NULL)
 1712                 goto no_ddp;
 1713         prsv = &ddp->prsv;
 1714 
 1715         alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift;
 1716         alias++;
 1717         prsv->prsv_tag &= ~pr->pr_alias_mask;
 1718         prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask;
 1719 
 1720         ddp->cmp.last_datasn = -1;
 1721         cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
 1722         *tttp = htobe32(prsv->prsv_tag);
 1723         *arg = ctsio;
 1724 
 1725         return (0);
 1726 }
 1727 
 1728 void
 1729 icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg)
 1730 {
 1731         struct ctl_scsiio *ctsio = arg;
 1732 
 1733         if (ctsio != NULL) {
 1734                 struct cxgbei_ddp_state *ddp;
 1735 
 1736                 ddp = io_to_ddp_state(ctsio);
 1737                 MPASS(ddp != NULL);
 1738 
 1739                 cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp);
 1740                 if (ctsio->kern_data_len == ctsio->ext_data_filled ||
 1741                     ic->ic_disconnecting) {
 1742                         t4_free_page_pods(&ddp->prsv);
 1743                         free(ddp, M_CXGBEI);
 1744                         io_to_ddp_state(ctsio) = NULL;
 1745                 }
 1746         }
 1747 }
 1748 
 1749 #ifdef COMPAT_FREEBSD13
 1750 static void
 1751 cxgbei_limits(struct adapter *sc, void *arg)
 1752 {
 1753         struct icl_drv_limits *idl = arg;
 1754         struct cxgbei_data *ci;
 1755         int max_dsl;
 1756 
 1757         if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims") != 0)
 1758                 return;
 1759 
 1760         if (uld_active(sc, ULD_ISCSI)) {
 1761                 ci = sc->iscsi_ulp_softc;
 1762                 MPASS(ci != NULL);
 1763 
 1764 
 1765                 max_dsl = ci->max_rx_data_len;
 1766                 if (idl->idl_max_recv_data_segment_length > max_dsl)
 1767                         idl->idl_max_recv_data_segment_length = max_dsl;
 1768 
 1769                 max_dsl = ci->max_tx_data_len;
 1770                 if (idl->idl_max_send_data_segment_length > max_dsl)
 1771                         idl->idl_max_send_data_segment_length = max_dsl;
 1772         }
 1773 
 1774         end_synchronized_op(sc, LOCK_HELD);
 1775 }
 1776 #endif
 1777 
 1778 static int
 1779 cxgbei_limits_fd(struct icl_drv_limits *idl, int fd)
 1780 {
 1781         struct find_ofld_adapter_rr fa;
 1782         struct file *fp;
 1783         struct socket *so;
 1784         struct adapter *sc;
 1785         struct cxgbei_data *ci;
 1786         cap_rights_t rights;
 1787         int error;
 1788 
 1789         error = fget(curthread, fd,
 1790             cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp);
 1791         if (error != 0)
 1792                 return (error);
 1793         if (fp->f_type != DTYPE_SOCKET) {
 1794                 fdrop(fp, curthread);
 1795                 return (EINVAL);
 1796         }
 1797         so = fp->f_data;
 1798         if (so->so_type != SOCK_STREAM ||
 1799             so->so_proto->pr_protocol != IPPROTO_TCP) {
 1800                 fdrop(fp, curthread);
 1801                 return (EINVAL);
 1802         }
 1803 
 1804         /* Find the adapter offloading this socket. */
 1805         fa.sc = NULL;
 1806         fa.so = so;
 1807         t4_iterate(find_offload_adapter, &fa);
 1808         if (fa.sc == NULL) {
 1809                 fdrop(fp, curthread);
 1810                 return (ENXIO);
 1811         }
 1812         fdrop(fp, curthread);
 1813 
 1814         sc = fa.sc;
 1815         error = begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims");
 1816         if (error != 0)
 1817                 return (error);
 1818 
 1819         if (uld_active(sc, ULD_ISCSI)) {
 1820                 ci = sc->iscsi_ulp_softc;
 1821                 MPASS(ci != NULL);
 1822 
 1823                 idl->idl_max_recv_data_segment_length = ci->max_rx_data_len;
 1824                 idl->idl_max_send_data_segment_length = ci->max_tx_data_len;
 1825         } else
 1826                 error = ENXIO;
 1827 
 1828         end_synchronized_op(sc, LOCK_HELD);
 1829 
 1830         return (error);
 1831 }
 1832 
 1833 static int
 1834 icl_cxgbei_limits(struct icl_drv_limits *idl, int socket)
 1835 {
 1836 
 1837         /* Maximum allowed by the RFC.  cxgbei_limits will clip them. */
 1838         idl->idl_max_recv_data_segment_length = (1 << 24) - 1;
 1839         idl->idl_max_send_data_segment_length = (1 << 24) - 1;
 1840 
 1841         /* These are somewhat arbitrary. */
 1842         idl->idl_max_burst_length = max_burst_length;
 1843         idl->idl_first_burst_length = first_burst_length;
 1844 
 1845 #ifdef COMPAT_FREEBSD13
 1846         if (socket == 0) {
 1847                 t4_iterate(cxgbei_limits, idl);
 1848                 return (0);
 1849         }
 1850 #endif
 1851 
 1852         return (cxgbei_limits_fd(idl, socket));
 1853 }
 1854 
 1855 int
 1856 icl_cxgbei_mod_load(void)
 1857 {
 1858         int rc;
 1859 
 1860         refcount_init(&icl_cxgbei_ncons, 0);
 1861 
 1862         rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits,
 1863             icl_cxgbei_new_conn);
 1864 
 1865         return (rc);
 1866 }
 1867 
 1868 int
 1869 icl_cxgbei_mod_unload(void)
 1870 {
 1871 
 1872         if (icl_cxgbei_ncons != 0)
 1873                 return (EBUSY);
 1874 
 1875         icl_unregister("cxgbei", false);
 1876 
 1877         return (0);
 1878 }
 1879 #endif

Cache object: 5cc755bd00101a425d78a991d38846a6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.