The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/toecore.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2012 Chelsio Communications, Inc.
    5  * All rights reserved.
    6  * Written by: Navdeep Parhar <np@FreeBSD.org>
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/eventhandler.h>
   38 #include <sys/kernel.h>
   39 #include <sys/systm.h>
   40 #include <sys/malloc.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/module.h>
   43 #include <sys/types.h>
   44 #include <sys/sockopt.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/socket.h>
   47 
   48 #include <net/ethernet.h>
   49 #include <net/if.h>
   50 #include <net/if_var.h>
   51 #include <net/if_types.h>
   52 #include <net/if_vlan_var.h>
   53 #include <net/if_llatbl.h>
   54 #include <net/route.h>
   55 
   56 #include <netinet/if_ether.h>
   57 #include <netinet/in.h>
   58 #include <netinet/in_pcb.h>
   59 #include <netinet/in_var.h>
   60 #include <netinet6/in6_var.h>
   61 #include <netinet6/in6_pcb.h>
   62 #include <netinet6/nd6.h>
   63 #define TCPSTATES
   64 #include <netinet/tcp.h>
   65 #include <netinet/tcp_fsm.h>
   66 #include <netinet/tcp_timer.h>
   67 #include <netinet/tcp_var.h>
   68 #include <netinet/tcp_syncache.h>
   69 #include <netinet/tcp_offload.h>
   70 #include <netinet/toecore.h>
   71 
   72 static struct mtx toedev_lock;
   73 static TAILQ_HEAD(, toedev) toedev_list;
   74 static eventhandler_tag listen_start_eh;
   75 static eventhandler_tag listen_stop_eh;
   76 static eventhandler_tag lle_event_eh;
   77 
   78 static int
   79 toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
   80     struct nhop_object *nh __unused, struct sockaddr *nam __unused)
   81 {
   82 
   83         return (ENOTSUP);
   84 }
   85 
   86 static int
   87 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
   88 {
   89 
   90         return (ENOTSUP);
   91 }
   92 
   93 static int
   94 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
   95 {
   96 
   97         return (ENOTSUP);
   98 }
   99 
  100 static void
  101 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
  102     struct mbuf *m)
  103 {
  104 
  105         m_freem(m);
  106         return;
  107 }
  108 
  109 static void
  110 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
  111 {
  112 
  113         return;
  114 }
  115 
  116 static int
  117 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
  118 {
  119 
  120         return (ENOTSUP);
  121 }
  122 
  123 static void
  124 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
  125 {
  126 
  127         return;
  128 }
  129 
  130 static void
  131 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
  132     struct sockaddr *sa __unused, uint8_t *lladdr __unused,
  133     uint16_t vtag __unused)
  134 {
  135 
  136         return;
  137 }
  138 
  139 static void
  140 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
  141     struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused)
  142 {
  143 
  144         return;
  145 }
  146 
  147 static void
  148 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
  149 {
  150 
  151         return;
  152 }
  153 
  154 static void
  155 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
  156 {
  157 
  158         return;
  159 }
  160 
  161 static int
  162 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
  163     struct mbuf *m)
  164 {
  165 
  166         m_freem(m);
  167         return (0);
  168 }
  169 
  170 static void
  171 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
  172     struct socket *so __unused)
  173 {
  174 
  175         return;
  176 }
  177 
  178 static void
  179 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
  180     int sopt_dir __unused, int sopt_name __unused)
  181 {
  182 
  183         return;
  184 }
  185 
  186 static void
  187 toedev_tcp_info(struct toedev *tod __unused, struct tcpcb *tp __unused,
  188     struct tcp_info *ti __unused)
  189 {
  190 
  191         return;
  192 }
  193 
  194 static int
  195 toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused,
  196     struct ktls_session *tls __unused, int direction __unused)
  197 {
  198 
  199         return (EINVAL);
  200 }
  201 
  202 static void
  203 toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused,
  204     tcp_seq seq __unused, int mtu __unused)
  205 {
  206 
  207         return;
  208 }
  209 
  210 /*
  211  * Inform one or more TOE devices about a listening socket.
  212  */
  213 static void
  214 toe_listen_start(struct inpcb *inp, void *arg)
  215 {
  216         struct toedev *t, *tod;
  217         struct tcpcb *tp;
  218 
  219         INP_WLOCK_ASSERT(inp);
  220         KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
  221             ("%s: inp is not a TCP inp", __func__));
  222 
  223         if (inp->inp_flags & INP_DROPPED)
  224                 return;
  225 
  226         tp = intotcpcb(inp);
  227         if (tp->t_state != TCPS_LISTEN)
  228                 return;
  229 
  230         t = arg;
  231         mtx_lock(&toedev_lock);
  232         TAILQ_FOREACH(tod, &toedev_list, link) {
  233                 if (t == NULL || t == tod)
  234                         tod->tod_listen_start(tod, tp);
  235         }
  236         mtx_unlock(&toedev_lock);
  237 }
  238 
  239 static void
  240 toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
  241 {
  242         struct inpcb *inp = tptoinpcb(tp);
  243 
  244         INP_WLOCK_ASSERT(inp);
  245         KASSERT(tp->t_state == TCPS_LISTEN,
  246             ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
  247 
  248         toe_listen_start(inp, NULL);
  249 }
  250 
  251 static void
  252 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
  253 {
  254         struct toedev *tod;
  255 #ifdef INVARIANTS
  256         struct inpcb *inp = tptoinpcb(tp);
  257 #endif
  258 
  259         INP_WLOCK_ASSERT(inp);
  260         KASSERT(tp->t_state == TCPS_LISTEN,
  261             ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
  262 
  263         mtx_lock(&toedev_lock);
  264         TAILQ_FOREACH(tod, &toedev_list, link)
  265             tod->tod_listen_stop(tod, tp);
  266         mtx_unlock(&toedev_lock);
  267 }
  268 
  269 /*
  270  * Fill up a freshly allocated toedev struct with reasonable defaults.
  271  */
  272 void
  273 init_toedev(struct toedev *tod)
  274 {
  275 
  276         tod->tod_softc = NULL;
  277 
  278         /*
  279          * Provide no-op defaults so that the kernel can call any toedev
  280          * function without having to check whether the TOE driver supplied one
  281          * or not.
  282          */
  283         tod->tod_connect = toedev_connect;
  284         tod->tod_listen_start = toedev_listen_start;
  285         tod->tod_listen_stop = toedev_listen_stop;
  286         tod->tod_input = toedev_input;
  287         tod->tod_rcvd = toedev_rcvd;
  288         tod->tod_output = toedev_output;
  289         tod->tod_send_rst = toedev_output;
  290         tod->tod_send_fin = toedev_output;
  291         tod->tod_pcb_detach = toedev_pcb_detach;
  292         tod->tod_l2_update = toedev_l2_update;
  293         tod->tod_route_redirect = toedev_route_redirect;
  294         tod->tod_syncache_added = toedev_syncache_added;
  295         tod->tod_syncache_removed = toedev_syncache_removed;
  296         tod->tod_syncache_respond = toedev_syncache_respond;
  297         tod->tod_offload_socket = toedev_offload_socket;
  298         tod->tod_ctloutput = toedev_ctloutput;
  299         tod->tod_tcp_info = toedev_tcp_info;
  300         tod->tod_alloc_tls_session = toedev_alloc_tls_session;
  301         tod->tod_pmtu_update = toedev_pmtu_update;
  302 }
  303 
  304 /*
  305  * Register an active TOE device with the system.  This allows it to receive
  306  * notifications from the kernel.
  307  */
  308 int
  309 register_toedev(struct toedev *tod)
  310 {
  311         struct toedev *t;
  312 
  313         mtx_lock(&toedev_lock);
  314         TAILQ_FOREACH(t, &toedev_list, link) {
  315                 if (t == tod) {
  316                         mtx_unlock(&toedev_lock);
  317                         return (EEXIST);
  318                 }
  319         }
  320 
  321         TAILQ_INSERT_TAIL(&toedev_list, tod, link);
  322         registered_toedevs++;
  323         mtx_unlock(&toedev_lock);
  324 
  325         inp_apply_all(&V_tcbinfo, toe_listen_start, tod);
  326 
  327         return (0);
  328 }
  329 
  330 /*
  331  * Remove the TOE device from the global list of active TOE devices.  It is the
  332  * caller's responsibility to ensure that the TOE device is quiesced prior to
  333  * this call.
  334  */
  335 int
  336 unregister_toedev(struct toedev *tod)
  337 {
  338         struct toedev *t, *t2;
  339         int rc = ENODEV;
  340 
  341         mtx_lock(&toedev_lock);
  342         TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
  343                 if (t == tod) {
  344                         TAILQ_REMOVE(&toedev_list, tod, link);
  345                         registered_toedevs--;
  346                         rc = 0;
  347                         break;
  348                 }
  349         }
  350         KASSERT(registered_toedevs >= 0,
  351             ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
  352         mtx_unlock(&toedev_lock);
  353         return (rc);
  354 }
  355 
  356 void
  357 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
  358     struct inpcb *inp, void *tod, void *todctx, uint8_t iptos)
  359 {
  360 
  361         INP_RLOCK_ASSERT(inp);
  362 
  363         (void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod,
  364             todctx, iptos, htons(0));
  365 }
  366 
  367 int
  368 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
  369     struct tcphdr *th, struct socket **lsop)
  370 {
  371 
  372         NET_EPOCH_ASSERT();
  373 
  374         return (syncache_expand(inc, to, th, lsop, NULL, htons(0)));
  375 }
  376 
  377 /*
  378  * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
  379  * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
  380  * in TIME_WAIT may be assassinated freeing it up for re-use.
  381  *
  382  * Note that the TCP header must have been run through tcp_fields_to_host() or
  383  * equivalent.
  384  */
  385 int
  386 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
  387 {
  388         struct inpcb *inp;
  389         struct tcpcb *tp;
  390 
  391         if (inc->inc_flags & INC_ISIPV6) {
  392                 inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
  393                     inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
  394                     INPLOOKUP_RLOCKPCB, ifp);
  395         } else {
  396                 inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
  397                     inc->inc_laddr, inc->inc_lport, INPLOOKUP_RLOCKPCB, ifp);
  398         }
  399         if (inp != NULL) {
  400                 INP_RLOCK_ASSERT(inp);
  401 
  402                 tp = intotcpcb(inp);
  403                 if (tp->t_state == TCPS_TIME_WAIT && th != NULL) {
  404                         if (!tcp_twcheck(inp, NULL, th, NULL, 0))
  405                                 return (EADDRINUSE);
  406                 } else {
  407                         INP_RUNLOCK(inp);
  408                         return (EADDRINUSE);
  409                 }
  410         }
  411 
  412         return (0);
  413 }
  414 
  415 static void
  416 toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
  417 {
  418         struct toedev *tod;
  419         struct ifnet *ifp;
  420         struct sockaddr *sa;
  421         uint8_t *lladdr;
  422         uint16_t vid, pcp;
  423         int family;
  424         struct sockaddr_in6 sin6;
  425 
  426         LLE_WLOCK_ASSERT(lle);
  427 
  428         ifp = lltable_get_ifp(lle->lle_tbl);
  429         family = lltable_get_af(lle->lle_tbl);
  430 
  431         if (family != AF_INET && family != AF_INET6)
  432                 return;
  433         /*
  434          * Not interested if the interface's TOE capability is not enabled.
  435          */
  436         if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
  437             (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
  438                 return;
  439 
  440         tod = TOEDEV(ifp);
  441         if (tod == NULL)
  442                 return;
  443 
  444         sa = (struct sockaddr *)&sin6;
  445         lltable_fill_sa_entry(lle, sa);
  446 
  447         vid = 0xfff;
  448         pcp = 0;
  449         if (evt != LLENTRY_RESOLVED) {
  450                 /*
  451                  * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
  452                  * this entry is going to be deleted.
  453                  */
  454 
  455                 lladdr = NULL;
  456         } else {
  457                 KASSERT(lle->la_flags & LLE_VALID,
  458                     ("%s: %p resolved but not valid?", __func__, lle));
  459 
  460                 lladdr = (uint8_t *)lle->ll_addr;
  461                 VLAN_TAG(ifp, &vid);
  462                 VLAN_PCP(ifp, &pcp);
  463         }
  464 
  465         tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0));
  466 }
  467 
  468 /*
  469  * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
  470  * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
  471  * tod_l2_update will be called later, when the entry is resolved or times out.
  472  */
  473 int
  474 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
  475     uint8_t *lladdr, uint16_t *vtag)
  476 {
  477         int rc;
  478         uint16_t vid, pcp;
  479 
  480         switch (sa->sa_family) {
  481 #ifdef INET
  482         case AF_INET:
  483                 rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
  484                 break;
  485 #endif
  486 #ifdef INET6
  487         case AF_INET6:
  488                 rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr,
  489                     NULL, NULL);
  490                 break;
  491 #endif
  492         default:
  493                 return (EPROTONOSUPPORT);
  494         }
  495 
  496         if (rc == 0) {
  497                 vid = 0xfff;
  498                 pcp = 0;
  499                 if (ifp->if_type == IFT_L2VLAN) {
  500                         VLAN_TAG(ifp, &vid);
  501                         VLAN_PCP(ifp, &pcp);
  502                 } else if (ifp->if_pcp != IFNET_PCP_NONE) {
  503                         vid = 0;
  504                         pcp = ifp->if_pcp;
  505                 }
  506                 *vtag = EVL_MAKETAG(vid, pcp, 0);
  507         }
  508 
  509         return (rc);
  510 }
  511 
  512 void
  513 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
  514 {
  515 
  516         NET_EPOCH_ASSERT();
  517         INP_WLOCK_ASSERT(inp);
  518 
  519         if (!(inp->inp_flags & INP_DROPPED)) {
  520                 struct tcpcb *tp = intotcpcb(inp);
  521 
  522                 KASSERT(tp->t_flags & TF_TOE,
  523                     ("%s: tp %p not offloaded.", __func__, tp));
  524 
  525                 if (err == EAGAIN) {
  526                         /*
  527                          * Temporary failure during offload, take this PCB back.
  528                          * Detach from the TOE driver and do the rest of what
  529                          * TCP's pru_connect would have done if the connection
  530                          * wasn't offloaded.
  531                          */
  532 
  533                         tod->tod_pcb_detach(tod, tp);
  534                         KASSERT(!(tp->t_flags & TF_TOE),
  535                             ("%s: tp %p still offloaded.", __func__, tp));
  536                         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  537                         if (tcp_output(tp) < 0)
  538                                 INP_WLOCK(inp); /* re-acquire */
  539                 } else {
  540                         tp = tcp_drop(tp, err);
  541                         if (tp == NULL)
  542                                 INP_WLOCK(inp); /* re-acquire */
  543                 }
  544         }
  545         INP_WLOCK_ASSERT(inp);
  546 }
  547 
  548 static int
  549 toecore_load(void)
  550 {
  551 
  552         mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
  553         TAILQ_INIT(&toedev_list);
  554 
  555         listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
  556             toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
  557         listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
  558             toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
  559         lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
  560             EVENTHANDLER_PRI_ANY);
  561 
  562         return (0);
  563 }
  564 
  565 static int
  566 toecore_unload(void)
  567 {
  568 
  569         mtx_lock(&toedev_lock);
  570         if (!TAILQ_EMPTY(&toedev_list)) {
  571                 mtx_unlock(&toedev_lock);
  572                 return (EBUSY);
  573         }
  574 
  575         EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
  576         EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
  577         EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
  578 
  579         mtx_unlock(&toedev_lock);
  580         mtx_destroy(&toedev_lock);
  581 
  582         return (0);
  583 }
  584 
  585 static int
  586 toecore_mod_handler(module_t mod, int cmd, void *arg)
  587 {
  588 
  589         if (cmd == MOD_LOAD)
  590                 return (toecore_load());
  591 
  592         if (cmd == MOD_UNLOAD)
  593                 return (toecore_unload());
  594 
  595         return (EOPNOTSUPP);
  596 }
  597 
  598 static moduledata_t mod_data= {
  599         "toecore",
  600         toecore_mod_handler,
  601         0
  602 };
  603 
  604 MODULE_VERSION(toecore, 1);
  605 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);

Cache object: 215b18a6801b2626a4a5ba5904342161


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.