The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/toecore.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2012 Chelsio Communications, Inc.
    3  * All rights reserved.
    4  * Written by: Navdeep Parhar <np@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/11.1/sys/netinet/toecore.c 301217 2016-06-02 17:51:29Z gnn $");
   30 
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/kernel.h>
   36 #include <sys/systm.h>
   37 #include <sys/malloc.h>
   38 #include <sys/mbuf.h>
   39 #include <sys/module.h>
   40 #include <sys/types.h>
   41 #include <sys/sockopt.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/socket.h>
   44 
   45 #include <net/ethernet.h>
   46 #include <net/if.h>
   47 #include <net/if_var.h>
   48 #include <net/if_types.h>
   49 #include <net/if_vlan_var.h>
   50 #include <net/if_llatbl.h>
   51 #include <net/route.h>
   52 
   53 #include <netinet/if_ether.h>
   54 #include <netinet/in.h>
   55 #include <netinet/in_pcb.h>
   56 #include <netinet/in_var.h>
   57 #include <netinet6/in6_var.h>
   58 #include <netinet6/in6_pcb.h>
   59 #include <netinet6/nd6.h>
   60 #define TCPSTATES
   61 #include <netinet/tcp.h>
   62 #include <netinet/tcp_fsm.h>
   63 #include <netinet/tcp_timer.h>
   64 #include <netinet/tcp_var.h>
   65 #include <netinet/tcp_syncache.h>
   66 #include <netinet/tcp_offload.h>
   67 #include <netinet/toecore.h>
   68 
   69 static struct mtx toedev_lock;
   70 static TAILQ_HEAD(, toedev) toedev_list;
   71 static eventhandler_tag listen_start_eh;
   72 static eventhandler_tag listen_stop_eh;
   73 static eventhandler_tag lle_event_eh;
   74 
   75 static int
   76 toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
   77     struct rtentry *rt __unused, struct sockaddr *nam __unused)
   78 {
   79 
   80         return (ENOTSUP);
   81 }
   82 
   83 static int
   84 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
   85 {
   86 
   87         return (ENOTSUP);
   88 }
   89 
   90 static int
   91 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
   92 {
   93 
   94         return (ENOTSUP);
   95 }
   96 
   97 static void
   98 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
   99     struct mbuf *m)
  100 {
  101 
  102         m_freem(m);
  103         return;
  104 }
  105 
  106 static void
  107 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
  108 {
  109 
  110         return;
  111 }
  112 
  113 static int
  114 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
  115 {
  116 
  117         return (ENOTSUP);
  118 }
  119 
  120 static void
  121 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
  122 {
  123 
  124         return;
  125 }
  126 
  127 static void
  128 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
  129     struct sockaddr *sa __unused, uint8_t *lladdr __unused,
  130     uint16_t vtag __unused)
  131 {
  132 
  133         return;
  134 }
  135 
  136 static void
  137 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
  138     struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
  139 {
  140 
  141         return;
  142 }
  143 
  144 static void
  145 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
  146 {
  147 
  148         return;
  149 }
  150 
  151 static void
  152 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
  153 {
  154 
  155         return;
  156 }
  157 
  158 static int
  159 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
  160     struct mbuf *m)
  161 {
  162 
  163         m_freem(m);
  164         return (0);
  165 }
  166 
  167 static void
  168 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
  169     struct socket *so __unused)
  170 {
  171 
  172         return;
  173 }
  174 
  175 static void
  176 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
  177     int sopt_dir __unused, int sopt_name __unused)
  178 {
  179 
  180         return;
  181 }
  182 
  183 /*
  184  * Inform one or more TOE devices about a listening socket.
  185  */
  186 static void
  187 toe_listen_start(struct inpcb *inp, void *arg)
  188 {
  189         struct toedev *t, *tod;
  190         struct tcpcb *tp;
  191 
  192         INP_WLOCK_ASSERT(inp);
  193         KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
  194             ("%s: inp is not a TCP inp", __func__));
  195 
  196         if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
  197                 return;
  198 
  199         tp = intotcpcb(inp);
  200         if (tp->t_state != TCPS_LISTEN)
  201                 return;
  202 
  203         t = arg;
  204         mtx_lock(&toedev_lock);
  205         TAILQ_FOREACH(tod, &toedev_list, link) {
  206                 if (t == NULL || t == tod)
  207                         tod->tod_listen_start(tod, tp);
  208         }
  209         mtx_unlock(&toedev_lock);
  210 }
  211 
  212 static void
  213 toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
  214 {
  215         struct inpcb *inp = tp->t_inpcb;
  216 
  217         INP_WLOCK_ASSERT(inp);
  218         KASSERT(tp->t_state == TCPS_LISTEN,
  219             ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
  220 
  221         toe_listen_start(inp, NULL);
  222 }
  223 
  224 static void
  225 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
  226 {
  227         struct toedev *tod;
  228 #ifdef INVARIANTS
  229         struct inpcb *inp = tp->t_inpcb;
  230 #endif
  231 
  232         INP_WLOCK_ASSERT(inp);
  233         KASSERT(tp->t_state == TCPS_LISTEN,
  234             ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
  235 
  236         mtx_lock(&toedev_lock);
  237         TAILQ_FOREACH(tod, &toedev_list, link)
  238             tod->tod_listen_stop(tod, tp);
  239         mtx_unlock(&toedev_lock);
  240 }
  241 
  242 /*
  243  * Fill up a freshly allocated toedev struct with reasonable defaults.
  244  */
  245 void
  246 init_toedev(struct toedev *tod)
  247 {
  248 
  249         tod->tod_softc = NULL;
  250 
  251         /*
  252          * Provide no-op defaults so that the kernel can call any toedev
  253          * function without having to check whether the TOE driver supplied one
  254          * or not.
  255          */
  256         tod->tod_connect = toedev_connect;
  257         tod->tod_listen_start = toedev_listen_start;
  258         tod->tod_listen_stop = toedev_listen_stop;
  259         tod->tod_input = toedev_input;
  260         tod->tod_rcvd = toedev_rcvd;
  261         tod->tod_output = toedev_output;
  262         tod->tod_send_rst = toedev_output;
  263         tod->tod_send_fin = toedev_output;
  264         tod->tod_pcb_detach = toedev_pcb_detach;
  265         tod->tod_l2_update = toedev_l2_update;
  266         tod->tod_route_redirect = toedev_route_redirect;
  267         tod->tod_syncache_added = toedev_syncache_added;
  268         tod->tod_syncache_removed = toedev_syncache_removed;
  269         tod->tod_syncache_respond = toedev_syncache_respond;
  270         tod->tod_offload_socket = toedev_offload_socket;
  271         tod->tod_ctloutput = toedev_ctloutput;
  272 }
  273 
  274 /*
  275  * Register an active TOE device with the system.  This allows it to receive
  276  * notifications from the kernel.
  277  */
  278 int
  279 register_toedev(struct toedev *tod)
  280 {
  281         struct toedev *t;
  282 
  283         mtx_lock(&toedev_lock);
  284         TAILQ_FOREACH(t, &toedev_list, link) {
  285                 if (t == tod) {
  286                         mtx_unlock(&toedev_lock);
  287                         return (EEXIST);
  288                 }
  289         }
  290 
  291         TAILQ_INSERT_TAIL(&toedev_list, tod, link);
  292         registered_toedevs++;
  293         mtx_unlock(&toedev_lock);
  294 
  295         inp_apply_all(toe_listen_start, tod);
  296 
  297         return (0);
  298 }
  299 
  300 /*
  301  * Remove the TOE device from the global list of active TOE devices.  It is the
  302  * caller's responsibility to ensure that the TOE device is quiesced prior to
  303  * this call.
  304  */
  305 int
  306 unregister_toedev(struct toedev *tod)
  307 {
  308         struct toedev *t, *t2;
  309         int rc = ENODEV;
  310 
  311         mtx_lock(&toedev_lock);
  312         TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
  313                 if (t == tod) {
  314                         TAILQ_REMOVE(&toedev_list, tod, link);
  315                         registered_toedevs--;
  316                         rc = 0;
  317                         break;
  318                 }
  319         }
  320         KASSERT(registered_toedevs >= 0,
  321             ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
  322         mtx_unlock(&toedev_lock);
  323         return (rc);
  324 }
  325 
  326 void
  327 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
  328     struct inpcb *inp, void *tod, void *todctx)
  329 {
  330         struct socket *lso = inp->inp_socket;
  331 
  332         INP_WLOCK_ASSERT(inp);
  333 
  334         syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
  335 }
  336 
  337 int
  338 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
  339     struct tcphdr *th, struct socket **lsop)
  340 {
  341 
  342         INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
  343 
  344         return (syncache_expand(inc, to, th, lsop, NULL));
  345 }
  346 
  347 /*
  348  * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
  349  * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
  350  * in TIME_WAIT may be assassinated freeing it up for re-use.
  351  *
  352  * Note that the TCP header must have been run through tcp_fields_to_host() or
  353  * equivalent.
  354  */
  355 int
  356 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
  357 {
  358         struct inpcb *inp;
  359 
  360         if (inc->inc_flags & INC_ISIPV6) {
  361                 inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
  362                     inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
  363                     INPLOOKUP_WLOCKPCB, ifp);
  364         } else {
  365                 inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
  366                     inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
  367         }
  368         if (inp != NULL) {
  369                 INP_WLOCK_ASSERT(inp);
  370 
  371                 if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
  372 
  373                         INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
  374                         if (!tcp_twcheck(inp, NULL, th, NULL, 0))
  375                                 return (EADDRINUSE);
  376                 } else {
  377                         INP_WUNLOCK(inp);
  378                         return (EADDRINUSE);
  379                 }
  380         }
  381 
  382         return (0);
  383 }
  384 
  385 static void
  386 toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
  387 {
  388         struct toedev *tod;
  389         struct ifnet *ifp;
  390         struct sockaddr *sa;
  391         uint8_t *lladdr;
  392         uint16_t vtag;
  393         int family;
  394         struct sockaddr_in6 sin6;
  395 
  396         LLE_WLOCK_ASSERT(lle);
  397 
  398         ifp = lltable_get_ifp(lle->lle_tbl);
  399         family = lltable_get_af(lle->lle_tbl);
  400 
  401         if (family != AF_INET && family != AF_INET6)
  402                 return;
  403         /*
  404          * Not interested if the interface's TOE capability is not enabled.
  405          */
  406         if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
  407             (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
  408                 return;
  409 
  410         tod = TOEDEV(ifp);
  411         if (tod == NULL)
  412                 return;
  413 
  414         sa = (struct sockaddr *)&sin6;
  415         lltable_fill_sa_entry(lle, sa);
  416 
  417         vtag = 0xfff;
  418         if (evt != LLENTRY_RESOLVED) {
  419 
  420                 /*
  421                  * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
  422                  * this entry is going to be deleted.
  423                  */
  424 
  425                 lladdr = NULL;
  426         } else {
  427 
  428                 KASSERT(lle->la_flags & LLE_VALID,
  429                     ("%s: %p resolved but not valid?", __func__, lle));
  430 
  431                 lladdr = (uint8_t *)lle->ll_addr;
  432 #ifdef VLAN_TAG
  433                 VLAN_TAG(ifp, &vtag);
  434 #endif
  435         }
  436 
  437         tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
  438 }
  439 
  440 /*
  441  * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
  442  * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
  443  * tod_l2_update will be called later, when the entry is resolved or times out.
  444  */
  445 int
  446 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
  447     uint8_t *lladdr, uint16_t *vtag)
  448 {
  449         int rc;
  450 
  451         switch (sa->sa_family) {
  452 #ifdef INET
  453         case AF_INET:
  454                 rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
  455                 break;
  456 #endif
  457 #ifdef INET6
  458         case AF_INET6:
  459                 rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
  460                 break;
  461 #endif
  462         default:
  463                 return (EPROTONOSUPPORT);
  464         }
  465 
  466         if (rc == 0) {
  467 #ifdef VLAN_TAG
  468                 if (VLAN_TAG(ifp, vtag) != 0)
  469 #endif
  470                         *vtag = 0xfff;
  471         }
  472 
  473         return (rc);
  474 }
  475 
  476 void
  477 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
  478 {
  479 
  480         INP_WLOCK_ASSERT(inp);
  481 
  482         if (!(inp->inp_flags & INP_DROPPED)) {
  483                 struct tcpcb *tp = intotcpcb(inp);
  484 
  485                 KASSERT(tp->t_flags & TF_TOE,
  486                     ("%s: tp %p not offloaded.", __func__, tp));
  487 
  488                 if (err == EAGAIN) {
  489 
  490                         /*
  491                          * Temporary failure during offload, take this PCB back.
  492                          * Detach from the TOE driver and do the rest of what
  493                          * TCP's pru_connect would have done if the connection
  494                          * wasn't offloaded.
  495                          */
  496 
  497                         tod->tod_pcb_detach(tod, tp);
  498                         KASSERT(!(tp->t_flags & TF_TOE),
  499                             ("%s: tp %p still offloaded.", __func__, tp));
  500                         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  501                         (void) tp->t_fb->tfb_tcp_output(tp);
  502                 } else {
  503 
  504                         INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
  505                         tp = tcp_drop(tp, err);
  506                         if (tp == NULL)
  507                                 INP_WLOCK(inp); /* re-acquire */
  508                 }
  509         }
  510         INP_WLOCK_ASSERT(inp);
  511 }
  512 
  513 static int
  514 toecore_load(void)
  515 {
  516 
  517         mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
  518         TAILQ_INIT(&toedev_list);
  519 
  520         listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
  521             toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
  522         listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
  523             toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
  524         lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
  525             EVENTHANDLER_PRI_ANY);
  526 
  527         return (0);
  528 }
  529 
  530 static int
  531 toecore_unload(void)
  532 {
  533 
  534         mtx_lock(&toedev_lock);
  535         if (!TAILQ_EMPTY(&toedev_list)) {
  536                 mtx_unlock(&toedev_lock);
  537                 return (EBUSY);
  538         }
  539 
  540         EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
  541         EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
  542         EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
  543 
  544         mtx_unlock(&toedev_lock);
  545         mtx_destroy(&toedev_lock);
  546 
  547         return (0);
  548 }
  549 
  550 static int
  551 toecore_mod_handler(module_t mod, int cmd, void *arg)
  552 {
  553 
  554         if (cmd == MOD_LOAD)
  555                 return (toecore_load());
  556 
  557         if (cmd == MOD_UNLOAD)
  558                 return (toecore_unload());
  559 
  560         return (EOPNOTSUPP);
  561 }
  562 
  563 static moduledata_t mod_data= {
  564         "toecore",
  565         toecore_mod_handler,
  566         0
  567 };
  568 
  569 MODULE_VERSION(toecore, 1);
  570 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);

Cache object: 1fab02e9c993c4425222ba42683e157e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.