FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_lro.c
1 /******************************************************************************
2
3 Copyright (c) 2007, Myricom Inc.
4 Copyright (c) 2008, Intel Corporation.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 3. Neither the name of the Intel Corporation, nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
32
33 $FreeBSD: releng/9.0/sys/netinet/tcp_lro.c 223797 2011-07-05 18:43:54Z cperciva $
34 ***************************************************************************/
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/endian.h>
39 #include <sys/mbuf.h>
40 #include <sys/kernel.h>
41 #include <sys/socket.h>
42
43 #include <net/if.h>
44 #include <net/ethernet.h>
45 #include <net/if_media.h>
46
47 #include <netinet/in_systm.h>
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/tcp.h>
51 #include <netinet/tcp_lro.h>
52
53 #include <machine/bus.h>
54 #include <machine/in_cksum.h>
55
56
57 static uint16_t do_csum_data(uint16_t *raw, int len)
58 {
59 uint32_t csum;
60 csum = 0;
61 while (len > 0) {
62 csum += *raw;
63 raw++;
64 csum += *raw;
65 raw++;
66 len -= 4;
67 }
68 csum = (csum >> 16) + (csum & 0xffff);
69 csum = (csum >> 16) + (csum & 0xffff);
70 return (uint16_t)csum;
71 }
72
73 /*
74 * Allocate and init the LRO data structures
75 */
76 int
77 tcp_lro_init(struct lro_ctrl *cntl)
78 {
79 struct lro_entry *lro;
80 int i, error = 0;
81
82 SLIST_INIT(&cntl->lro_free);
83 SLIST_INIT(&cntl->lro_active);
84
85 cntl->lro_bad_csum = 0;
86 cntl->lro_queued = 0;
87 cntl->lro_flushed = 0;
88
89 for (i = 0; i < LRO_ENTRIES; i++) {
90 lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
91 M_DEVBUF, M_NOWAIT | M_ZERO);
92 if (lro == NULL) {
93 if (i == 0)
94 error = ENOMEM;
95 break;
96 }
97 cntl->lro_cnt = i;
98 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
99 }
100
101 return (error);
102 }
103
104 void
105 tcp_lro_free(struct lro_ctrl *cntl)
106 {
107 struct lro_entry *entry;
108
109 while (!SLIST_EMPTY(&cntl->lro_free)) {
110 entry = SLIST_FIRST(&cntl->lro_free);
111 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
112 free(entry, M_DEVBUF);
113 }
114 }
115
116 void
117 tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
118 {
119 struct ifnet *ifp;
120 struct ip *ip;
121 struct tcphdr *tcp;
122 uint32_t *ts_ptr;
123 uint32_t tcplen, tcp_csum;
124
125
126 if (lro->append_cnt) {
127 /* incorporate the new len into the ip header and
128 * re-calculate the checksum */
129 ip = lro->ip;
130 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
131 ip->ip_sum = 0;
132 ip->ip_sum = 0xffff ^
133 do_csum_data((uint16_t*)ip,
134 sizeof (*ip));
135
136 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
137 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
138 lro->m_head->m_pkthdr.csum_data = 0xffff;
139 lro->m_head->m_pkthdr.len = lro->len;
140
141 /* incorporate the latest ack into the tcp header */
142 tcp = (struct tcphdr *) (ip + 1);
143 tcp->th_ack = lro->ack_seq;
144 tcp->th_win = lro->window;
145 /* incorporate latest timestamp into the tcp header */
146 if (lro->timestamp) {
147 ts_ptr = (uint32_t *)(tcp + 1);
148 ts_ptr[1] = htonl(lro->tsval);
149 ts_ptr[2] = lro->tsecr;
150 }
151 /*
152 * update checksum in tcp header by re-calculating the
153 * tcp pseudoheader checksum, and adding it to the checksum
154 * of the tcp payload data
155 */
156 tcp->th_sum = 0;
157 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
158 tcp_csum = lro->data_csum;
159 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
160 htons(tcplen + IPPROTO_TCP));
161 tcp_csum += do_csum_data((uint16_t*)tcp,
162 tcp->th_off << 2);
163 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
164 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
165 tcp->th_sum = 0xffff ^ tcp_csum;
166 }
167 ifp = cntl->ifp;
168 (*ifp->if_input)(cntl->ifp, lro->m_head);
169 cntl->lro_queued += lro->append_cnt + 1;
170 cntl->lro_flushed++;
171 lro->m_head = NULL;
172 lro->timestamp = 0;
173 lro->append_cnt = 0;
174 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
175 }
176
177 int
178 tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
179 {
180 struct ether_header *eh;
181 struct ip *ip;
182 struct tcphdr *tcp;
183 uint32_t *ts_ptr;
184 struct mbuf *m_nxt, *m_tail;
185 struct lro_entry *lro;
186 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
187 int opt_bytes, trim, csum_flags;
188 uint32_t seq, tmp_csum, device_mtu;
189
190
191 eh = mtod(m_head, struct ether_header *);
192 if (eh->ether_type != htons(ETHERTYPE_IP))
193 return 1;
194 ip = (struct ip *) (eh + 1);
195 if (ip->ip_p != IPPROTO_TCP)
196 return 1;
197
198 /* ensure there are no options */
199 if ((ip->ip_hl << 2) != sizeof (*ip))
200 return -1;
201
202 /* .. and the packet is not fragmented */
203 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
204 return -1;
205
206 /* verify that the IP header checksum is correct */
207 csum_flags = m_head->m_pkthdr.csum_flags;
208 if (csum_flags & CSUM_IP_CHECKED) {
209 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
210 cntl->lro_bad_csum++;
211 return -1;
212 }
213 } else {
214 tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
215 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
216 cntl->lro_bad_csum++;
217 return -1;
218 }
219 }
220
221 /* find the TCP header */
222 tcp = (struct tcphdr *) (ip + 1);
223
224 /* Get the TCP checksum if we dont have it */
225 if (!csum)
226 csum = tcp->th_sum;
227
228 /* ensure no bits set besides ack or psh */
229 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
230 return -1;
231
232 /* check for timestamps. Since the only option we handle are
233 timestamps, we only have to handle the simple case of
234 aligned timestamps */
235
236 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
237 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
238 ts_ptr = (uint32_t *)(tcp + 1);
239 if (opt_bytes != 0) {
240 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
241 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
242 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
243 return -1;
244 }
245
246 ip_len = ntohs(ip->ip_len);
247 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
248
249
250 /*
251 * If frame is padded beyond the end of the IP packet,
252 * then we must trim the extra bytes off the end.
253 */
254 tot_len = m_head->m_pkthdr.len;
255 trim = tot_len - (ip_len + ETHER_HDR_LEN);
256 if (trim != 0) {
257 if (trim < 0) {
258 /* truncated packet */
259 return -1;
260 }
261 m_adj(m_head, -trim);
262 tot_len = m_head->m_pkthdr.len;
263 }
264
265 m_nxt = m_head;
266 m_tail = NULL; /* -Wuninitialized */
267 while (m_nxt != NULL) {
268 m_tail = m_nxt;
269 m_nxt = m_tail->m_next;
270 }
271
272 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
273 seq = ntohl(tcp->th_seq);
274
275 SLIST_FOREACH(lro, &cntl->lro_active, next) {
276 if (lro->source_port == tcp->th_sport &&
277 lro->dest_port == tcp->th_dport &&
278 lro->source_ip == ip->ip_src.s_addr &&
279 lro->dest_ip == ip->ip_dst.s_addr) {
280 /* Flush now if appending will result in overflow. */
281 if (lro->len > (65535 - tcp_data_len)) {
282 SLIST_REMOVE(&cntl->lro_active, lro,
283 lro_entry, next);
284 tcp_lro_flush(cntl, lro);
285 break;
286 }
287
288 /* Try to append it */
289
290 if (__predict_false(seq != lro->next_seq ||
291 (tcp_data_len == 0 &&
292 lro->ack_seq == tcp->th_ack))) {
293 /* out of order packet or dup ack */
294 SLIST_REMOVE(&cntl->lro_active, lro,
295 lro_entry, next);
296 tcp_lro_flush(cntl, lro);
297 return -1;
298 }
299
300 if (opt_bytes) {
301 uint32_t tsval = ntohl(*(ts_ptr + 1));
302 /* make sure timestamp values are increasing */
303 if (__predict_false(lro->tsval > tsval ||
304 *(ts_ptr + 2) == 0)) {
305 return -1;
306 }
307 lro->tsval = tsval;
308 lro->tsecr = *(ts_ptr + 2);
309 }
310
311 lro->next_seq += tcp_data_len;
312 lro->ack_seq = tcp->th_ack;
313 lro->window = tcp->th_win;
314 lro->append_cnt++;
315 if (tcp_data_len == 0) {
316 m_freem(m_head);
317 return 0;
318 }
319 /* subtract off the checksum of the tcp header
320 * from the hardware checksum, and add it to the
321 * stored tcp data checksum. Byteswap the checksum
322 * if the total length so far is odd
323 */
324 tmp_csum = do_csum_data((uint16_t*)tcp,
325 tcp_hdr_len);
326 csum = csum + (tmp_csum ^ 0xffff);
327 csum = (csum & 0xffff) + (csum >> 16);
328 csum = (csum & 0xffff) + (csum >> 16);
329 if (lro->len & 0x1) {
330 /* Odd number of bytes so far, flip bytes */
331 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
332 }
333 csum = csum + lro->data_csum;
334 csum = (csum & 0xffff) + (csum >> 16);
335 csum = (csum & 0xffff) + (csum >> 16);
336 lro->data_csum = csum;
337
338 lro->len += tcp_data_len;
339
340 /* adjust mbuf so that m->m_data points to
341 the first byte of the payload */
342 m_adj(m_head, hlen);
343 /* append mbuf chain */
344 lro->m_tail->m_next = m_head;
345 /* advance the last pointer */
346 lro->m_tail = m_tail;
347 /* flush packet if required */
348 device_mtu = cntl->ifp->if_mtu;
349 if (lro->len > (65535 - device_mtu)) {
350 SLIST_REMOVE(&cntl->lro_active, lro,
351 lro_entry, next);
352 tcp_lro_flush(cntl, lro);
353 }
354 return 0;
355 }
356 }
357
358 if (SLIST_EMPTY(&cntl->lro_free))
359 return -1;
360
361 /* start a new chain */
362 lro = SLIST_FIRST(&cntl->lro_free);
363 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
364 SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
365 lro->source_port = tcp->th_sport;
366 lro->dest_port = tcp->th_dport;
367 lro->source_ip = ip->ip_src.s_addr;
368 lro->dest_ip = ip->ip_dst.s_addr;
369 lro->next_seq = seq + tcp_data_len;
370 lro->mss = tcp_data_len;
371 lro->ack_seq = tcp->th_ack;
372 lro->window = tcp->th_win;
373
374 /* save the checksum of just the TCP payload by
375 * subtracting off the checksum of the TCP header from
376 * the entire hardware checksum
377 * Since IP header checksum is correct, checksum over
378 * the IP header is -0. Substracting -0 is unnecessary.
379 */
380 tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
381 csum = csum + (tmp_csum ^ 0xffff);
382 csum = (csum & 0xffff) + (csum >> 16);
383 csum = (csum & 0xffff) + (csum >> 16);
384 lro->data_csum = csum;
385
386 lro->ip = ip;
387 /* record timestamp if it is present */
388 if (opt_bytes) {
389 lro->timestamp = 1;
390 lro->tsval = ntohl(*(ts_ptr + 1));
391 lro->tsecr = *(ts_ptr + 2);
392 }
393 lro->len = tot_len;
394 lro->m_head = m_head;
395 lro->m_tail = m_tail;
396 return 0;
397 }
Cache object: a54e9bee528bc53f1b897a5c7ea100a9
|