FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_lro.c
1 /*-
2 * Copyright (c) 2007, Myricom Inc.
3 * Copyright (c) 2008, Intel Corporation.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: releng/8.4/sys/netinet/tcp_lro.c 236092 2012-05-26 10:24:35Z bz $
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/endian.h>
33 #include <sys/mbuf.h>
34 #include <sys/kernel.h>
35 #include <sys/socket.h>
36
37 #include <net/if.h>
38 #include <net/ethernet.h>
39 #include <net/if_media.h>
40
41 #include <netinet/in_systm.h>
42 #include <netinet/in.h>
43 #include <netinet/ip.h>
44 #include <netinet/tcp.h>
45 #include <netinet/tcp_lro.h>
46
47 #include <machine/bus.h>
48 #include <machine/in_cksum.h>
49
50
51 static uint16_t do_csum_data(uint16_t *raw, int len)
52 {
53 uint32_t csum;
54 csum = 0;
55 while (len > 0) {
56 csum += *raw;
57 raw++;
58 csum += *raw;
59 raw++;
60 len -= 4;
61 }
62 csum = (csum >> 16) + (csum & 0xffff);
63 csum = (csum >> 16) + (csum & 0xffff);
64 return (uint16_t)csum;
65 }
66
67 /*
68 * Allocate and init the LRO data structures
69 */
70 int
71 tcp_lro_init(struct lro_ctrl *cntl)
72 {
73 struct lro_entry *lro;
74 int i, error = 0;
75
76 SLIST_INIT(&cntl->lro_free);
77 SLIST_INIT(&cntl->lro_active);
78
79 cntl->lro_bad_csum = 0;
80 cntl->lro_queued = 0;
81 cntl->lro_flushed = 0;
82
83 for (i = 0; i < LRO_ENTRIES; i++) {
84 lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
85 M_DEVBUF, M_NOWAIT | M_ZERO);
86 if (lro == NULL) {
87 if (i == 0)
88 error = ENOMEM;
89 break;
90 }
91 cntl->lro_cnt = i;
92 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
93 }
94
95 return (error);
96 }
97
98 void
99 tcp_lro_free(struct lro_ctrl *cntl)
100 {
101 struct lro_entry *entry;
102
103 while (!SLIST_EMPTY(&cntl->lro_free)) {
104 entry = SLIST_FIRST(&cntl->lro_free);
105 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
106 free(entry, M_DEVBUF);
107 }
108 }
109
110 void
111 tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
112 {
113 struct ifnet *ifp;
114 struct ip *ip;
115 struct tcphdr *tcp;
116 uint32_t *ts_ptr;
117 uint32_t tcplen, tcp_csum;
118
119
120 if (lro->append_cnt) {
121 /* incorporate the new len into the ip header and
122 * re-calculate the checksum */
123 ip = lro->ip;
124 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
125 ip->ip_sum = 0;
126 ip->ip_sum = 0xffff ^
127 do_csum_data((uint16_t*)ip,
128 sizeof (*ip));
129
130 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
131 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
132 lro->m_head->m_pkthdr.csum_data = 0xffff;
133 lro->m_head->m_pkthdr.len = lro->len;
134
135 /* incorporate the latest ack into the tcp header */
136 tcp = (struct tcphdr *) (ip + 1);
137 tcp->th_ack = lro->ack_seq;
138 tcp->th_win = lro->window;
139 /* incorporate latest timestamp into the tcp header */
140 if (lro->timestamp) {
141 ts_ptr = (uint32_t *)(tcp + 1);
142 ts_ptr[1] = htonl(lro->tsval);
143 ts_ptr[2] = lro->tsecr;
144 }
145 /*
146 * update checksum in tcp header by re-calculating the
147 * tcp pseudoheader checksum, and adding it to the checksum
148 * of the tcp payload data
149 */
150 tcp->th_sum = 0;
151 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
152 tcp_csum = lro->data_csum;
153 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
154 htons(tcplen + IPPROTO_TCP));
155 tcp_csum += do_csum_data((uint16_t*)tcp,
156 tcp->th_off << 2);
157 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
158 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
159 tcp->th_sum = 0xffff ^ tcp_csum;
160 }
161 ifp = cntl->ifp;
162 (*ifp->if_input)(cntl->ifp, lro->m_head);
163 cntl->lro_queued += lro->append_cnt + 1;
164 cntl->lro_flushed++;
165 lro->m_head = NULL;
166 lro->timestamp = 0;
167 lro->append_cnt = 0;
168 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
169 }
170
171 int
172 tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
173 {
174 struct ether_header *eh;
175 struct ip *ip;
176 struct tcphdr *tcp;
177 uint32_t *ts_ptr;
178 struct mbuf *m_nxt, *m_tail;
179 struct lro_entry *lro;
180 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
181 int opt_bytes, trim, csum_flags;
182 uint32_t seq, tmp_csum, device_mtu;
183
184
185 eh = mtod(m_head, struct ether_header *);
186 if (eh->ether_type != htons(ETHERTYPE_IP))
187 return 1;
188 ip = (struct ip *) (eh + 1);
189 if (ip->ip_p != IPPROTO_TCP)
190 return 1;
191
192 /* ensure there are no options */
193 if ((ip->ip_hl << 2) != sizeof (*ip))
194 return -1;
195
196 /* .. and the packet is not fragmented */
197 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
198 return -1;
199
200 /* verify that the IP header checksum is correct */
201 csum_flags = m_head->m_pkthdr.csum_flags;
202 if (csum_flags & CSUM_IP_CHECKED) {
203 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
204 cntl->lro_bad_csum++;
205 return -1;
206 }
207 } else {
208 tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
209 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
210 cntl->lro_bad_csum++;
211 return -1;
212 }
213 }
214
215 /* find the TCP header */
216 tcp = (struct tcphdr *) (ip + 1);
217
218 /* Get the TCP checksum if we dont have it */
219 if (!csum)
220 csum = tcp->th_sum;
221
222 /* ensure no bits set besides ack or psh */
223 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
224 return -1;
225
226 /* check for timestamps. Since the only option we handle are
227 timestamps, we only have to handle the simple case of
228 aligned timestamps */
229
230 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
231 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
232 ts_ptr = (uint32_t *)(tcp + 1);
233 if (opt_bytes != 0) {
234 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
235 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
236 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
237 return -1;
238 }
239
240 ip_len = ntohs(ip->ip_len);
241 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
242
243
244 /*
245 * If frame is padded beyond the end of the IP packet,
246 * then we must trim the extra bytes off the end.
247 */
248 tot_len = m_head->m_pkthdr.len;
249 trim = tot_len - (ip_len + ETHER_HDR_LEN);
250 if (trim != 0) {
251 if (trim < 0) {
252 /* truncated packet */
253 return -1;
254 }
255 m_adj(m_head, -trim);
256 tot_len = m_head->m_pkthdr.len;
257 }
258
259 m_nxt = m_head;
260 m_tail = NULL; /* -Wuninitialized */
261 while (m_nxt != NULL) {
262 m_tail = m_nxt;
263 m_nxt = m_tail->m_next;
264 }
265
266 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
267 seq = ntohl(tcp->th_seq);
268
269 SLIST_FOREACH(lro, &cntl->lro_active, next) {
270 if (lro->source_port == tcp->th_sport &&
271 lro->dest_port == tcp->th_dport &&
272 lro->source_ip == ip->ip_src.s_addr &&
273 lro->dest_ip == ip->ip_dst.s_addr) {
274 /* Try to append it */
275
276 if (__predict_false(seq != lro->next_seq)) {
277 /* out of order packet */
278 SLIST_REMOVE(&cntl->lro_active, lro,
279 lro_entry, next);
280 tcp_lro_flush(cntl, lro);
281 return -1;
282 }
283
284 if (opt_bytes) {
285 uint32_t tsval = ntohl(*(ts_ptr + 1));
286 /* make sure timestamp values are increasing */
287 if (__predict_false(lro->tsval > tsval ||
288 *(ts_ptr + 2) == 0)) {
289 return -1;
290 }
291 lro->tsval = tsval;
292 lro->tsecr = *(ts_ptr + 2);
293 }
294
295 lro->next_seq += tcp_data_len;
296 lro->ack_seq = tcp->th_ack;
297 lro->window = tcp->th_win;
298 lro->append_cnt++;
299 if (tcp_data_len == 0) {
300 m_freem(m_head);
301 return 0;
302 }
303 /* subtract off the checksum of the tcp header
304 * from the hardware checksum, and add it to the
305 * stored tcp data checksum. Byteswap the checksum
306 * if the total length so far is odd
307 */
308 tmp_csum = do_csum_data((uint16_t*)tcp,
309 tcp_hdr_len);
310 csum = csum + (tmp_csum ^ 0xffff);
311 csum = (csum & 0xffff) + (csum >> 16);
312 csum = (csum & 0xffff) + (csum >> 16);
313 if (lro->len & 0x1) {
314 /* Odd number of bytes so far, flip bytes */
315 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
316 }
317 csum = csum + lro->data_csum;
318 csum = (csum & 0xffff) + (csum >> 16);
319 csum = (csum & 0xffff) + (csum >> 16);
320 lro->data_csum = csum;
321
322 lro->len += tcp_data_len;
323
324 /* adjust mbuf so that m->m_data points to
325 the first byte of the payload */
326 m_adj(m_head, hlen);
327 /* append mbuf chain */
328 lro->m_tail->m_next = m_head;
329 /* advance the last pointer */
330 lro->m_tail = m_tail;
331 /* flush packet if required */
332 device_mtu = cntl->ifp->if_mtu;
333 if (lro->len > (65535 - device_mtu)) {
334 SLIST_REMOVE(&cntl->lro_active, lro,
335 lro_entry, next);
336 tcp_lro_flush(cntl, lro);
337 }
338 return 0;
339 }
340 }
341
342 if (SLIST_EMPTY(&cntl->lro_free))
343 return -1;
344
345 /* start a new chain */
346 lro = SLIST_FIRST(&cntl->lro_free);
347 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
348 SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
349 lro->source_port = tcp->th_sport;
350 lro->dest_port = tcp->th_dport;
351 lro->source_ip = ip->ip_src.s_addr;
352 lro->dest_ip = ip->ip_dst.s_addr;
353 lro->next_seq = seq + tcp_data_len;
354 lro->mss = tcp_data_len;
355 lro->ack_seq = tcp->th_ack;
356 lro->window = tcp->th_win;
357
358 /* save the checksum of just the TCP payload by
359 * subtracting off the checksum of the TCP header from
360 * the entire hardware checksum
361 * Since IP header checksum is correct, checksum over
362 * the IP header is -0. Substracting -0 is unnecessary.
363 */
364 tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
365 csum = csum + (tmp_csum ^ 0xffff);
366 csum = (csum & 0xffff) + (csum >> 16);
367 csum = (csum & 0xffff) + (csum >> 16);
368 lro->data_csum = csum;
369
370 lro->ip = ip;
371 /* record timestamp if it is present */
372 if (opt_bytes) {
373 lro->timestamp = 1;
374 lro->tsval = ntohl(*(ts_ptr + 1));
375 lro->tsecr = *(ts_ptr + 2);
376 }
377 lro->len = tot_len;
378 lro->m_head = m_head;
379 lro->m_tail = m_tail;
380 return 0;
381 }
Cache object: a77d100487a1e17c1ea9a1533533d3d3
|