The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_lro.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2007, Myricom Inc.
    3  * Copyright (c) 2008, Intel Corporation.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * $FreeBSD: releng/8.4/sys/netinet/tcp_lro.c 236092 2012-05-26 10:24:35Z bz $ 
   28  */
   29 
   30 #include <sys/param.h>
   31 #include <sys/systm.h>
   32 #include <sys/endian.h>
   33 #include <sys/mbuf.h>
   34 #include <sys/kernel.h>
   35 #include <sys/socket.h>
   36 
   37 #include <net/if.h>
   38 #include <net/ethernet.h>
   39 #include <net/if_media.h>
   40 
   41 #include <netinet/in_systm.h>
   42 #include <netinet/in.h>
   43 #include <netinet/ip.h>
   44 #include <netinet/tcp.h>
   45 #include <netinet/tcp_lro.h>
   46 
   47 #include <machine/bus.h>
   48 #include <machine/in_cksum.h>
   49 
   50 
   51 static uint16_t do_csum_data(uint16_t *raw, int len)
   52 {
   53         uint32_t csum;
   54         csum = 0;
   55         while (len > 0) {
   56                 csum += *raw;
   57                 raw++;
   58                 csum += *raw;
   59                 raw++;
   60                 len -= 4;
   61         }
   62         csum = (csum >> 16) + (csum & 0xffff);
   63         csum = (csum >> 16) + (csum & 0xffff);
   64         return (uint16_t)csum;
   65 }
   66 
   67 /*
   68  * Allocate and init the LRO data structures
   69  */
   70 int
   71 tcp_lro_init(struct lro_ctrl *cntl)
   72 {
   73         struct lro_entry *lro;
   74         int i, error = 0;
   75 
   76         SLIST_INIT(&cntl->lro_free);
   77         SLIST_INIT(&cntl->lro_active);
   78 
   79         cntl->lro_bad_csum = 0;
   80         cntl->lro_queued = 0;
   81         cntl->lro_flushed = 0;
   82 
   83         for (i = 0; i < LRO_ENTRIES; i++) {
   84                 lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
   85                     M_DEVBUF, M_NOWAIT | M_ZERO);
   86                 if (lro == NULL) {
   87                         if (i == 0)
   88                                 error = ENOMEM;
   89                         break;
   90                 }
   91                 cntl->lro_cnt = i;
   92                 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
   93         }
   94 
   95         return (error);
   96 }
   97 
   98 void
   99 tcp_lro_free(struct lro_ctrl *cntl)
  100 {
  101         struct lro_entry *entry;
  102 
  103         while (!SLIST_EMPTY(&cntl->lro_free)) {
  104                 entry = SLIST_FIRST(&cntl->lro_free);
  105                 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
  106                 free(entry, M_DEVBUF);
  107         }
  108 }
  109 
  110 void
  111 tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
  112 {
  113         struct ifnet *ifp;
  114         struct ip *ip;
  115         struct tcphdr *tcp;
  116         uint32_t *ts_ptr;
  117         uint32_t tcplen, tcp_csum;
  118 
  119 
  120         if (lro->append_cnt) {
  121                 /* incorporate the new len into the ip header and
  122                  * re-calculate the checksum */
  123                 ip = lro->ip;
  124                 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
  125                 ip->ip_sum = 0;
  126                 ip->ip_sum = 0xffff ^ 
  127                         do_csum_data((uint16_t*)ip,
  128                                               sizeof (*ip));
  129 
  130                 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
  131                         CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  132                 lro->m_head->m_pkthdr.csum_data = 0xffff;
  133                 lro->m_head->m_pkthdr.len = lro->len;
  134 
  135                 /* incorporate the latest ack into the tcp header */
  136                 tcp = (struct tcphdr *) (ip + 1);
  137                 tcp->th_ack = lro->ack_seq;
  138                 tcp->th_win = lro->window;
  139                 /* incorporate latest timestamp into the tcp header */
  140                 if (lro->timestamp) {
  141                         ts_ptr = (uint32_t *)(tcp + 1);
  142                         ts_ptr[1] = htonl(lro->tsval);
  143                         ts_ptr[2] = lro->tsecr;
  144                 }
  145                 /* 
  146                  * update checksum in tcp header by re-calculating the
  147                  * tcp pseudoheader checksum, and adding it to the checksum
  148                  * of the tcp payload data 
  149                  */
  150                 tcp->th_sum = 0;
  151                 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
  152                 tcp_csum = lro->data_csum;
  153                 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
  154                                       htons(tcplen + IPPROTO_TCP));
  155                 tcp_csum += do_csum_data((uint16_t*)tcp,
  156                                                   tcp->th_off << 2);
  157                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
  158                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
  159                 tcp->th_sum = 0xffff ^ tcp_csum;
  160         }
  161         ifp = cntl->ifp;
  162         (*ifp->if_input)(cntl->ifp, lro->m_head);
  163         cntl->lro_queued += lro->append_cnt + 1;
  164         cntl->lro_flushed++;
  165         lro->m_head = NULL;
  166         lro->timestamp = 0;
  167         lro->append_cnt = 0;
  168         SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
  169 }
  170 
  171 int
  172 tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
  173 {
  174         struct ether_header *eh;
  175         struct ip *ip;
  176         struct tcphdr *tcp;
  177         uint32_t *ts_ptr;
  178         struct mbuf *m_nxt, *m_tail;
  179         struct lro_entry *lro;
  180         int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
  181         int opt_bytes, trim, csum_flags;
  182         uint32_t seq, tmp_csum, device_mtu;
  183 
  184 
  185         eh = mtod(m_head, struct ether_header *);
  186         if (eh->ether_type != htons(ETHERTYPE_IP))
  187                 return 1;
  188         ip = (struct ip *) (eh + 1);
  189         if (ip->ip_p != IPPROTO_TCP)
  190                 return 1;
  191         
  192         /* ensure there are no options */
  193         if ((ip->ip_hl << 2) != sizeof (*ip))
  194                 return -1;
  195 
  196         /* .. and the packet is not fragmented */
  197         if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
  198                 return -1;
  199 
  200         /* verify that the IP header checksum is correct */
  201         csum_flags = m_head->m_pkthdr.csum_flags;
  202         if (csum_flags & CSUM_IP_CHECKED) {
  203                 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
  204                         cntl->lro_bad_csum++;
  205                         return -1;
  206                 }
  207         } else {
  208                 tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
  209                 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
  210                         cntl->lro_bad_csum++;
  211                         return -1;
  212                 }
  213         }
  214         
  215         /* find the TCP header */
  216         tcp = (struct tcphdr *) (ip + 1);
  217 
  218         /* Get the TCP checksum if we dont have it */
  219         if (!csum)
  220                 csum = tcp->th_sum;
  221 
  222         /* ensure no bits set besides ack or psh */
  223         if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
  224                 return -1;
  225 
  226         /* check for timestamps. Since the only option we handle are
  227            timestamps, we only have to handle the simple case of
  228            aligned timestamps */
  229 
  230         opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
  231         tcp_hdr_len =  sizeof (*tcp) + opt_bytes;
  232         ts_ptr = (uint32_t *)(tcp + 1);
  233         if (opt_bytes != 0) {
  234                 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
  235                     (*ts_ptr !=  ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
  236                     TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
  237                         return -1;
  238         }
  239 
  240         ip_len = ntohs(ip->ip_len);
  241         tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
  242         
  243 
  244         /* 
  245          * If frame is padded beyond the end of the IP packet,
  246          * then we must trim the extra bytes off the end.
  247          */
  248         tot_len = m_head->m_pkthdr.len;
  249         trim = tot_len - (ip_len + ETHER_HDR_LEN);
  250         if (trim != 0) {
  251                 if (trim < 0) {
  252                         /* truncated packet */
  253                         return -1;
  254                 }
  255                 m_adj(m_head, -trim);
  256                 tot_len = m_head->m_pkthdr.len;
  257         }
  258 
  259         m_nxt = m_head;
  260         m_tail = NULL; /* -Wuninitialized */
  261         while (m_nxt != NULL) {
  262                 m_tail = m_nxt;
  263                 m_nxt = m_tail->m_next;
  264         }
  265 
  266         hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
  267         seq = ntohl(tcp->th_seq);
  268 
  269         SLIST_FOREACH(lro, &cntl->lro_active, next) {
  270                 if (lro->source_port == tcp->th_sport && 
  271                     lro->dest_port == tcp->th_dport &&
  272                     lro->source_ip == ip->ip_src.s_addr && 
  273                     lro->dest_ip == ip->ip_dst.s_addr) {
  274                         /* Try to append it */
  275 
  276                         if (__predict_false(seq != lro->next_seq)) {
  277                                 /* out of order packet */
  278                                 SLIST_REMOVE(&cntl->lro_active, lro,
  279                                              lro_entry, next);
  280                                 tcp_lro_flush(cntl, lro);
  281                                 return -1;
  282                         }
  283 
  284                         if (opt_bytes) {
  285                                 uint32_t tsval = ntohl(*(ts_ptr + 1));
  286                                 /* make sure timestamp values are increasing */
  287                                 if (__predict_false(lro->tsval > tsval || 
  288                                              *(ts_ptr + 2) == 0)) {
  289                                         return -1;
  290                                 }
  291                                 lro->tsval = tsval;
  292                                 lro->tsecr = *(ts_ptr + 2);
  293                         }
  294 
  295                         lro->next_seq += tcp_data_len;
  296                         lro->ack_seq = tcp->th_ack;
  297                         lro->window = tcp->th_win;
  298                         lro->append_cnt++;
  299                         if (tcp_data_len == 0) {
  300                                 m_freem(m_head);
  301                                 return 0;
  302                         }
  303                         /* subtract off the checksum of the tcp header
  304                          * from the hardware checksum, and add it to the
  305                          * stored tcp data checksum.  Byteswap the checksum
  306                          * if the total length so far is odd 
  307                          */
  308                         tmp_csum = do_csum_data((uint16_t*)tcp,
  309                                                          tcp_hdr_len);
  310                         csum = csum + (tmp_csum ^ 0xffff);
  311                         csum = (csum & 0xffff) + (csum >> 16);
  312                         csum = (csum & 0xffff) + (csum >> 16);
  313                         if (lro->len & 0x1) {
  314                                 /* Odd number of bytes so far, flip bytes */
  315                                 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
  316                         }
  317                         csum = csum + lro->data_csum;
  318                         csum = (csum & 0xffff) + (csum >> 16);
  319                         csum = (csum & 0xffff) + (csum >> 16);
  320                         lro->data_csum = csum;
  321 
  322                         lro->len += tcp_data_len;
  323 
  324                         /* adjust mbuf so that m->m_data points to
  325                            the first byte of the payload */
  326                         m_adj(m_head, hlen);
  327                         /* append mbuf chain */
  328                         lro->m_tail->m_next = m_head;
  329                         /* advance the last pointer */
  330                         lro->m_tail = m_tail;
  331                         /* flush packet if required */
  332                         device_mtu = cntl->ifp->if_mtu;
  333                         if (lro->len > (65535 - device_mtu)) {
  334                                 SLIST_REMOVE(&cntl->lro_active, lro,
  335                                              lro_entry, next);
  336                                 tcp_lro_flush(cntl, lro);
  337                         }
  338                         return 0;
  339                 }
  340         }
  341 
  342         if (SLIST_EMPTY(&cntl->lro_free))
  343             return -1;
  344 
  345         /* start a new chain */
  346         lro = SLIST_FIRST(&cntl->lro_free);
  347         SLIST_REMOVE_HEAD(&cntl->lro_free, next);
  348         SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
  349         lro->source_port = tcp->th_sport;
  350         lro->dest_port = tcp->th_dport;
  351         lro->source_ip = ip->ip_src.s_addr;
  352         lro->dest_ip = ip->ip_dst.s_addr;
  353         lro->next_seq = seq + tcp_data_len;
  354         lro->mss = tcp_data_len;
  355         lro->ack_seq = tcp->th_ack;
  356         lro->window = tcp->th_win;
  357 
  358         /* save the checksum of just the TCP payload by
  359          * subtracting off the checksum of the TCP header from
  360          * the entire hardware checksum 
  361          * Since IP header checksum is correct, checksum over
  362          * the IP header is -0.  Substracting -0 is unnecessary.
  363          */
  364         tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
  365         csum = csum + (tmp_csum ^ 0xffff);
  366         csum = (csum & 0xffff) + (csum >> 16);
  367         csum = (csum & 0xffff) + (csum >> 16);
  368         lro->data_csum = csum;
  369         
  370         lro->ip = ip;
  371         /* record timestamp if it is present */
  372         if (opt_bytes) {
  373                 lro->timestamp = 1;
  374                 lro->tsval = ntohl(*(ts_ptr + 1));
  375                 lro->tsecr = *(ts_ptr + 2);
  376         }
  377         lro->len = tot_len;
  378         lro->m_head = m_head;
  379         lro->m_tail = m_tail;
  380         return 0;
  381 }

Cache object: a77d100487a1e17c1ea9a1533533d3d3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.