The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_lro.c

Version: -  FREEBSD  -  FREEBSD11  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /******************************************************************************
    2 
    3 Copyright (c) 2007, Myricom Inc.
    4 Copyright (c) 2008, Intel Corporation.
    5 All rights reserved.
    6 
    7 Redistribution and use in source and binary forms, with or without
    8 modification, are permitted provided that the following conditions are met:
    9 
   10  1. Redistributions of source code must retain the above copyright notice,
   11     this list of conditions and the following disclaimer.
   12 
   13  2. Neither the name of the Myricom Inc, nor the names of its
   14     contributors may be used to endorse or promote products derived from
   15     this software without specific prior written permission.
   16 
   17  3. Neither the name of the Intel Corporation, nor the names of its
   18     contributors may be used to endorse or promote products derived from
   19     this software without specific prior written permission.
   20 
   21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   31 POSSIBILITY OF SUCH DAMAGE.
   32 
   33 $FreeBSD: releng/7.4/sys/netinet/tcp_lro.c 182924 2008-09-10 23:37:06Z kmacy $ 
   34 ***************************************************************************/
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/endian.h>
   39 #include <sys/mbuf.h>
   40 #include <sys/kernel.h>
   41 #include <sys/socket.h>
   42 
   43 #include <net/if.h>
   44 #include <net/ethernet.h>
   45 #include <net/if_media.h>
   46 
   47 #include <netinet/in_systm.h>
   48 #include <netinet/in.h>
   49 #include <netinet/ip.h>
   50 #include <netinet/tcp.h>
   51 #include <netinet/tcp_lro.h>
   52 
   53 #include <machine/bus.h>
   54 #include <machine/in_cksum.h>
   55 
   56 
   57 static uint16_t do_csum_data(uint16_t *raw, int len)
   58 {
   59         uint32_t csum;
   60         csum = 0;
   61         while (len > 0) {
   62                 csum += *raw;
   63                 raw++;
   64                 csum += *raw;
   65                 raw++;
   66                 len -= 4;
   67         }
   68         csum = (csum >> 16) + (csum & 0xffff);
   69         csum = (csum >> 16) + (csum & 0xffff);
   70         return (uint16_t)csum;
   71 }
   72 
   73 /*
   74  * Allocate and init the LRO data structures
   75  */
   76 int
   77 tcp_lro_init(struct lro_ctrl *cntl)
   78 {
   79         struct lro_entry *lro;
   80         int i, error = 0;
   81 
   82         SLIST_INIT(&cntl->lro_free);
   83         SLIST_INIT(&cntl->lro_active);
   84 
   85         cntl->lro_bad_csum = 0;
   86         cntl->lro_queued = 0;
   87         cntl->lro_flushed = 0;
   88 
   89         for (i = 0; i < LRO_ENTRIES; i++) {
   90                 lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
   91                     M_DEVBUF, M_NOWAIT | M_ZERO);
   92                 if (lro == NULL) {
   93                         if (i == 0)
   94                                 error = ENOMEM;
   95                         break;
   96                 }
   97                 cntl->lro_cnt = i;
   98                 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
   99         }
  100 
  101         return (error);
  102 }
  103 
  104 void
  105 tcp_lro_free(struct lro_ctrl *cntl)
  106 {
  107         struct lro_entry *entry;
  108 
  109         while (!SLIST_EMPTY(&cntl->lro_free)) {
  110                 entry = SLIST_FIRST(&cntl->lro_free);
  111                 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
  112                 free(entry, M_DEVBUF);
  113         }
  114 }
  115 
  116 void
  117 tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
  118 {
  119         struct ifnet *ifp;
  120         struct ip *ip;
  121         struct tcphdr *tcp;
  122         uint32_t *ts_ptr;
  123         uint32_t tcplen, tcp_csum;
  124 
  125 
  126         if (lro->append_cnt) {
  127                 /* incorporate the new len into the ip header and
  128                  * re-calculate the checksum */
  129                 ip = lro->ip;
  130                 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
  131                 ip->ip_sum = 0;
  132                 ip->ip_sum = 0xffff ^ 
  133                         do_csum_data((uint16_t*)ip,
  134                                               sizeof (*ip));
  135 
  136                 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
  137                         CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  138                 lro->m_head->m_pkthdr.csum_data = 0xffff;
  139                 lro->m_head->m_pkthdr.len = lro->len;
  140 
  141                 /* incorporate the latest ack into the tcp header */
  142                 tcp = (struct tcphdr *) (ip + 1);
  143                 tcp->th_ack = lro->ack_seq;
  144                 tcp->th_win = lro->window;
  145                 /* incorporate latest timestamp into the tcp header */
  146                 if (lro->timestamp) {
  147                         ts_ptr = (uint32_t *)(tcp + 1);
  148                         ts_ptr[1] = htonl(lro->tsval);
  149                         ts_ptr[2] = lro->tsecr;
  150                 }
  151                 /* 
  152                  * update checksum in tcp header by re-calculating the
  153                  * tcp pseudoheader checksum, and adding it to the checksum
  154                  * of the tcp payload data 
  155                  */
  156                 tcp->th_sum = 0;
  157                 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
  158                 tcp_csum = lro->data_csum;
  159                 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
  160                                       htons(tcplen + IPPROTO_TCP));
  161                 tcp_csum += do_csum_data((uint16_t*)tcp,
  162                                                   tcp->th_off << 2);
  163                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
  164                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
  165                 tcp->th_sum = 0xffff ^ tcp_csum;
  166         }
  167         ifp = cntl->ifp;
  168         (*ifp->if_input)(cntl->ifp, lro->m_head);
  169         cntl->lro_queued += lro->append_cnt + 1;
  170         cntl->lro_flushed++;
  171         lro->m_head = NULL;
  172         lro->timestamp = 0;
  173         lro->append_cnt = 0;
  174         SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
  175 }
  176 
  177 int
  178 tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
  179 {
  180         struct ether_header *eh;
  181         struct ip *ip;
  182         struct tcphdr *tcp;
  183         uint32_t *ts_ptr;
  184         struct mbuf *m_nxt, *m_tail;
  185         struct lro_entry *lro;
  186         int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
  187         int opt_bytes, trim, csum_flags;
  188         uint32_t seq, tmp_csum, device_mtu;
  189 
  190 
  191         eh = mtod(m_head, struct ether_header *);
  192         if (eh->ether_type != htons(ETHERTYPE_IP))
  193                 return 1;
  194         ip = (struct ip *) (eh + 1);
  195         if (ip->ip_p != IPPROTO_TCP)
  196                 return 1;
  197         
  198         /* ensure there are no options */
  199         if ((ip->ip_hl << 2) != sizeof (*ip))
  200                 return -1;
  201 
  202         /* .. and the packet is not fragmented */
  203         if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
  204                 return -1;
  205 
  206         /* verify that the IP header checksum is correct */
  207         csum_flags = m_head->m_pkthdr.csum_flags;
  208         if (csum_flags & CSUM_IP_CHECKED) {
  209                 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
  210                         cntl->lro_bad_csum++;
  211                         return -1;
  212                 }
  213         } else {
  214                 tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
  215                 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
  216                         cntl->lro_bad_csum++;
  217                         return -1;
  218                 }
  219         }
  220         
  221         /* find the TCP header */
  222         tcp = (struct tcphdr *) (ip + 1);
  223 
  224         /* Get the TCP checksum if we dont have it */
  225         if (!csum)
  226                 csum = tcp->th_sum;
  227 
  228         /* ensure no bits set besides ack or psh */
  229         if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
  230                 return -1;
  231 
  232         /* check for timestamps. Since the only option we handle are
  233            timestamps, we only have to handle the simple case of
  234            aligned timestamps */
  235 
  236         opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
  237         tcp_hdr_len =  sizeof (*tcp) + opt_bytes;
  238         ts_ptr = (uint32_t *)(tcp + 1);
  239         if (opt_bytes != 0) {
  240                 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
  241                     (*ts_ptr !=  ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
  242                     TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
  243                         return -1;
  244         }
  245 
  246         ip_len = ntohs(ip->ip_len);
  247         tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
  248         
  249 
  250         /* 
  251          * If frame is padded beyond the end of the IP packet,
  252          * then we must trim the extra bytes off the end.
  253          */
  254         tot_len = m_head->m_pkthdr.len;
  255         trim = tot_len - (ip_len + ETHER_HDR_LEN);
  256         if (trim != 0) {
  257                 if (trim < 0) {
  258                         /* truncated packet */
  259                         return -1;
  260                 }
  261                 m_adj(m_head, -trim);
  262                 tot_len = m_head->m_pkthdr.len;
  263         }
  264 
  265         m_nxt = m_head;
  266         m_tail = NULL; /* -Wuninitialized */
  267         while (m_nxt != NULL) {
  268                 m_tail = m_nxt;
  269                 m_nxt = m_tail->m_next;
  270         }
  271 
  272         hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
  273         seq = ntohl(tcp->th_seq);
  274 
  275         SLIST_FOREACH(lro, &cntl->lro_active, next) {
  276                 if (lro->source_port == tcp->th_sport && 
  277                     lro->dest_port == tcp->th_dport &&
  278                     lro->source_ip == ip->ip_src.s_addr && 
  279                     lro->dest_ip == ip->ip_dst.s_addr) {
  280                         /* Try to append it */
  281 
  282                         if (__predict_false(seq != lro->next_seq)) {
  283                                 /* out of order packet */
  284                                 SLIST_REMOVE(&cntl->lro_active, lro,
  285                                              lro_entry, next);
  286                                 tcp_lro_flush(cntl, lro);
  287                                 return -1;
  288                         }
  289 
  290                         if (opt_bytes) {
  291                                 uint32_t tsval = ntohl(*(ts_ptr + 1));
  292                                 /* make sure timestamp values are increasing */
  293                                 if (__predict_false(lro->tsval > tsval || 
  294                                              *(ts_ptr + 2) == 0)) {
  295                                         return -1;
  296                                 }
  297                                 lro->tsval = tsval;
  298                                 lro->tsecr = *(ts_ptr + 2);
  299                         }
  300 
  301                         lro->next_seq += tcp_data_len;
  302                         lro->ack_seq = tcp->th_ack;
  303                         lro->window = tcp->th_win;
  304                         lro->append_cnt++;
  305                         if (tcp_data_len == 0) {
  306                                 m_freem(m_head);
  307                                 return 0;
  308                         }
  309                         /* subtract off the checksum of the tcp header
  310                          * from the hardware checksum, and add it to the
  311                          * stored tcp data checksum.  Byteswap the checksum
  312                          * if the total length so far is odd 
  313                          */
  314                         tmp_csum = do_csum_data((uint16_t*)tcp,
  315                                                          tcp_hdr_len);
  316                         csum = csum + (tmp_csum ^ 0xffff);
  317                         csum = (csum & 0xffff) + (csum >> 16);
  318                         csum = (csum & 0xffff) + (csum >> 16);
  319                         if (lro->len & 0x1) {
  320                                 /* Odd number of bytes so far, flip bytes */
  321                                 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
  322                         }
  323                         csum = csum + lro->data_csum;
  324                         csum = (csum & 0xffff) + (csum >> 16);
  325                         csum = (csum & 0xffff) + (csum >> 16);
  326                         lro->data_csum = csum;
  327 
  328                         lro->len += tcp_data_len;
  329 
  330                         /* adjust mbuf so that m->m_data points to
  331                            the first byte of the payload */
  332                         m_adj(m_head, hlen);
  333                         /* append mbuf chain */
  334                         lro->m_tail->m_next = m_head;
  335                         /* advance the last pointer */
  336                         lro->m_tail = m_tail;
  337                         /* flush packet if required */
  338                         device_mtu = cntl->ifp->if_mtu;
  339                         if (lro->len > (65535 - device_mtu)) {
  340                                 SLIST_REMOVE(&cntl->lro_active, lro,
  341                                              lro_entry, next);
  342                                 tcp_lro_flush(cntl, lro);
  343                         }
  344                         return 0;
  345                 }
  346         }
  347 
  348         if (SLIST_EMPTY(&cntl->lro_free))
  349             return -1;
  350 
  351         /* start a new chain */
  352         lro = SLIST_FIRST(&cntl->lro_free);
  353         SLIST_REMOVE_HEAD(&cntl->lro_free, next);
  354         SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
  355         lro->source_port = tcp->th_sport;
  356         lro->dest_port = tcp->th_dport;
  357         lro->source_ip = ip->ip_src.s_addr;
  358         lro->dest_ip = ip->ip_dst.s_addr;
  359         lro->next_seq = seq + tcp_data_len;
  360         lro->mss = tcp_data_len;
  361         lro->ack_seq = tcp->th_ack;
  362         lro->window = tcp->th_win;
  363 
  364         /* save the checksum of just the TCP payload by
  365          * subtracting off the checksum of the TCP header from
  366          * the entire hardware checksum 
  367          * Since IP header checksum is correct, checksum over
  368          * the IP header is -0.  Substracting -0 is unnecessary.
  369          */
  370         tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
  371         csum = csum + (tmp_csum ^ 0xffff);
  372         csum = (csum & 0xffff) + (csum >> 16);
  373         csum = (csum & 0xffff) + (csum >> 16);
  374         lro->data_csum = csum;
  375         
  376         lro->ip = ip;
  377         /* record timestamp if it is present */
  378         if (opt_bytes) {
  379                 lro->timestamp = 1;
  380                 lro->tsval = ntohl(*(ts_ptr + 1));
  381                 lro->tsecr = *(ts_ptr + 2);
  382         }
  383         lro->len = tot_len;
  384         lro->m_head = m_head;
  385         lro->m_tail = m_tail;
  386         return 0;
  387 }

Cache object: f3b28dc64bea29170ddd63a6e3b53170


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.