The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_offload.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2007-2008, Chelsio Inc.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions are met:
    7  *
    8  * 1. Redistributions of source code must retain the above copyright notice,
    9  *    this list of conditions and the following disclaimer.
   10  *
   11  * 2. Neither the name of the Chelsio Corporation nor the names of its
   12  *    contributors may be used to endorse or promote products derived from
   13  *    this software without specific prior written permission.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   25  * POSSIBILITY OF SUCH DAMAGE.
   26  *
   27  * $FreeBSD$
   28  */
   29 
   30 #ifndef _NETINET_TCP_OFFLOAD_H_
   31 #define _NETINET_TCP_OFFLOAD_H_
   32 
   33 #ifndef _KERNEL
   34 #error "no user-serviceable parts inside"
   35 #endif
   36 
   37 /*
   38  * A driver publishes that it provides offload services
   39  * by setting IFCAP_TOE in the ifnet. The offload connect
   40  * will bypass any further work if the interface that a
   41  * connection would use does not support TCP offload.
   42  *
   43  * The TOE API assumes that the tcp offload engine can offload the 
   44  * the entire connection from set up to teardown, with some provision 
   45  * being made to allowing the software stack to handle time wait. If
   46  * the device does not meet these criteria, it is the driver's responsibility
   47  * to overload the functions that it needs to in tcp_usrreqs and make
   48  * its own calls to tcp_output if it needs to do so.
   49  *
   50  * There is currently no provision for the device advertising the congestion
   51  * control algorithms it supports as there is currently no API for querying 
   52  * an operating system for the protocols that it has loaded. This is a desirable
   53  * future extension.
   54  *
   55  *
   56  *
   57  * It is assumed that individuals deploying TOE will want connections
   58  * to be offloaded without software changes so all connections on an
   59  * interface providing TOE are offloaded unless the SO_NO_OFFLOAD 
   60  * flag is set on the socket.
   61  *
   62  *
   63  * The toe_usrreqs structure constitutes the TOE driver's 
   64  * interface to the TCP stack for functionality that doesn't
   65  * interact directly with userspace. If one wants to provide
   66  * (optional) functionality to do zero-copy to/from
   67  * userspace one still needs to override soreceive/sosend 
   68  * with functions that fault in and pin the user buffers.
   69  *
   70  * + tu_send
   71  *   - tells the driver that new data may have been added to the 
   72  *     socket's send buffer - the driver should not fail if the
   73  *     buffer is in fact unchanged
   74  *   - the driver is responsible for providing credits (bytes in the send window)
   75  *     back to the socket by calling sbdrop() as segments are acknowledged.
   76  *   - The driver expects the inpcb lock to be held - the driver is expected
   77  *     not to drop the lock. Hence the driver is not allowed to acquire the
   78  *     pcbinfo lock during this call.
   79  *
   80  * + tu_rcvd
   81  *   - returns credits to the driver and triggers window updates
   82  *     to the peer (a credit as used here is a byte in the peer's receive window)
   83  *   - the driver is expected to determine how many bytes have been 
   84  *     consumed and credit that back to the card so that it can grow
   85  *     the window again by maintaining its own state between invocations.
   86  *   - In principle this could be used to shrink the window as well as
   87  *     grow the window, although it is not used for that now.
   88  *   - this function needs to correctly handle being called any number of
   89  *     times without any bytes being consumed from the receive buffer.
   90  *   - The driver expects the inpcb lock to be held - the driver is expected
   91  *     not to drop the lock. Hence the driver is not allowed to acquire the
   92  *     pcbinfo lock during this call.
   93  *
   94  * + tu_disconnect
   95  *   - tells the driver to send FIN to peer
   96  *   - driver is expected to send the remaining data and then do a clean half close
   97  *   - disconnect implies at least half-close so only send, reset, and detach
   98  *     are legal
   99  *   - the driver is expected to handle transition through the shutdown
  100  *     state machine and allow the stack to support SO_LINGER.
  101  *   - The driver expects the inpcb lock to be held - the driver is expected
  102  *     not to drop the lock. Hence the driver is not allowed to acquire the
  103  *     pcbinfo lock during this call.
  104  *
  105  * + tu_reset
  106  *   - closes the connection and sends a RST to peer
  107  *   - driver is expectd to trigger an RST and detach the toepcb
  108  *   - no further calls are legal after reset
  109  *   - The driver expects the inpcb lock to be held - the driver is expected
  110  *     not to drop the lock. Hence the driver is not allowed to acquire the
  111  *     pcbinfo lock during this call.
  112  *
  113  *   The following fields in the tcpcb are expected to be referenced by the driver:
  114  *      + iss
  115  *      + rcv_nxt
  116  *      + rcv_wnd
  117  *      + snd_isn
  118  *      + snd_max
  119  *      + snd_nxt
  120  *      + snd_una
  121  *      + t_flags
  122  *      + t_inpcb
  123  *      + t_maxseg
  124  *      + t_toe
  125  *
  126  *   The following fields in the inpcb are expected to be referenced by the driver:
  127  *      + inp_lport
  128  *      + inp_fport
  129  *      + inp_laddr
  130  *      + inp_fport
  131  *      + inp_socket
  132  *      + inp_ip_tos
  133  *
  134  *   The following fields in the socket are expected to be referenced by the
  135  *   driver:
  136  *      + so_comp
  137  *      + so_error
  138  *      + so_linger
  139  *      + so_options
  140  *      + so_rcv
  141  *      + so_snd
  142  *      + so_state
  143  *      + so_timeo
  144  *
  145  *   These functions all return 0 on success and can return the following errors
  146  *   as appropriate:
  147  *      + EPERM:
  148  *      + ENOBUFS: memory allocation failed
  149  *      + EMSGSIZE: MTU changed during the call
  150  *      + EHOSTDOWN:
  151  *      + EHOSTUNREACH:
  152  *      + ENETDOWN:
  153  *      * ENETUNREACH: the peer is no longer reachable
  154  *
  155  * + tu_detach
  156  *   - tells driver that the socket is going away so disconnect
  157  *     the toepcb and free appropriate resources
  158  *   - allows the driver to cleanly handle the case of connection state
  159  *     outliving the socket
  160  *   - no further calls are legal after detach
  161  *   - the driver is expected to provide its own synchronization between
  162  *     detach and receiving new data.
  163  * 
  164  * + tu_syncache_event
  165  *   - even if it is not actually needed, the driver is expected to
  166  *     call syncache_add for the initial SYN and then syncache_expand
  167  *     for the SYN,ACK
  168  *   - tells driver that a connection either has not been added or has 
  169  *     been dropped from the syncache
  170  *   - the driver is expected to maintain state that lives outside the 
  171  *     software stack so the syncache needs to be able to notify the
  172  *     toe driver that the software stack is not going to create a connection
  173  *     for a received SYN
  174  *   - The driver is responsible for any synchronization required between
  175  *     the syncache dropping an entry and the driver processing the SYN,ACK.
  176  * 
  177  */
  178 struct toe_usrreqs {
  179         int (*tu_send)(struct tcpcb *tp);
  180         int (*tu_rcvd)(struct tcpcb *tp);
  181         int (*tu_disconnect)(struct tcpcb *tp);
  182         int (*tu_reset)(struct tcpcb *tp);
  183         void (*tu_detach)(struct tcpcb *tp);
  184         void (*tu_syncache_event)(int event, void *toep);
  185 };
  186 
  187 #define TOE_SC_ENTRY_PRESENT            1       /* 4-tuple already present */
  188 #define TOE_SC_DROP                     2       /* connection was timed out */
  189 
  190 /*
  191  * Because listen is a one-to-many relationship (a socket can be listening 
  192  * on all interfaces on a machine some of which may be using different TCP
  193  * offload devices), listen uses a publish/subscribe mechanism. The TCP
  194  * offload driver registers a listen notification function with the stack.
  195  * When a listen socket is created all TCP offload devices are notified
  196  * so that they can do the appropriate set up to offload connections on the
  197  * port to which the socket is bound. When the listen socket is closed,
  198  * the offload devices are notified so that they will stop listening on that
  199  * port and free any associated resources as well as sending RSTs on any
  200  * connections in the SYN_RCVD state.
  201  *
  202  */
  203 
  204 typedef void    (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
  205 typedef void    (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
  206 
  207 EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
  208 EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
  209 
  210 /*
  211  * Check if the socket can be offloaded by the following steps:
  212  * - determine the egress interface
  213  * - check the interface for TOE capability and TOE is enabled
  214  * - check if the device has resources to offload the connection
  215  */
  216 int     tcp_offload_connect(struct socket *so, struct sockaddr *nam);
  217 
  218 /*
  219  * The tcp_output_* routines are wrappers around the toe_usrreqs calls
  220  * which trigger packet transmission. In the non-offloaded case they
  221  * translate to tcp_output. The tcp_offload_* routines notify TOE
  222  * of specific events. I the non-offloaded case they are no-ops.
  223  *
  224  * Listen is a special case because it is a 1 to many relationship
  225  * and there can be more than one offload driver in the system.
  226  */
  227 
  228 /*
  229  * Connection is offloaded
  230  */
  231 #define tp_offload(tp)          ((tp)->t_flags & TF_TOE)
  232 
  233 /*
  234  * hackish way of allowing this file to also be included by TOE
  235  * which needs to be kept ignorant of socket implementation details
  236  */
  237 #ifdef _SYS_SOCKETVAR_H_
  238 /*
  239  * The socket has not been marked as "do not offload"
  240  */
  241 #define SO_OFFLOADABLE(so)      ((so->so_options & SO_NO_OFFLOAD) == 0)
  242 
  243 static __inline int
  244 tcp_output_connect(struct socket *so, struct sockaddr *nam)
  245 {
  246         struct tcpcb *tp = sototcpcb(so);
  247         int error;
  248 
  249         /*
  250          * If offload has been disabled for this socket or the 
  251          * connection cannot be offloaded just call tcp_output
  252          * to start the TCP state machine.
  253          */
  254 #ifndef TCP_OFFLOAD_DISABLE     
  255         if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
  256 #endif          
  257                 error = tcp_output(tp);
  258         return (error);
  259 }
  260 
  261 static __inline int
  262 tcp_output_send(struct tcpcb *tp)
  263 {
  264 
  265 #ifndef TCP_OFFLOAD_DISABLE
  266         if (tp_offload(tp))
  267                 return (tp->t_tu->tu_send(tp));
  268 #endif
  269         return (tcp_output(tp));
  270 }
  271 
  272 static __inline int
  273 tcp_output_rcvd(struct tcpcb *tp)
  274 {
  275 
  276 #ifndef TCP_OFFLOAD_DISABLE
  277         if (tp_offload(tp))
  278                 return (tp->t_tu->tu_rcvd(tp));
  279 #endif
  280         return (tcp_output(tp));
  281 }
  282 
  283 static __inline int
  284 tcp_output_disconnect(struct tcpcb *tp)
  285 {
  286 
  287 #ifndef TCP_OFFLOAD_DISABLE
  288         if (tp_offload(tp))
  289                 return (tp->t_tu->tu_disconnect(tp));
  290 #endif
  291         return (tcp_output(tp));
  292 }
  293 
  294 static __inline int
  295 tcp_output_reset(struct tcpcb *tp)
  296 {
  297 
  298 #ifndef TCP_OFFLOAD_DISABLE
  299         if (tp_offload(tp))
  300                 return (tp->t_tu->tu_reset(tp));
  301 #endif
  302         return (tcp_output(tp));
  303 }
  304 
  305 static __inline void
  306 tcp_offload_detach(struct tcpcb *tp)
  307 {
  308 
  309 #ifndef TCP_OFFLOAD_DISABLE
  310         if (tp_offload(tp))
  311                 tp->t_tu->tu_detach(tp);
  312 #endif  
  313 }
  314 
  315 static __inline void
  316 tcp_offload_listen_open(struct tcpcb *tp)
  317 {
  318 
  319 #ifndef TCP_OFFLOAD_DISABLE
  320         if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
  321                 EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
  322 #endif  
  323 }
  324 
  325 static __inline void
  326 tcp_offload_listen_close(struct tcpcb *tp)
  327 {
  328 
  329 #ifndef TCP_OFFLOAD_DISABLE
  330         EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
  331 #endif  
  332 }
  333 #undef SO_OFFLOADABLE
  334 #endif /* _SYS_SOCKETVAR_H_ */
  335 #undef tp_offload
  336 
  337 void tcp_offload_twstart(struct tcpcb *tp);
  338 struct tcpcb *tcp_offload_close(struct tcpcb *tp);
  339 struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
  340 
  341 #endif /* _NETINET_TCP_OFFLOAD_H_ */

Cache object: 5f695f93857dc781e96834eddad95b4d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.