1 /******************************************************************************
2
3 Copyright (c) 2007-2008, Myricom Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: releng/9.0/sys/dev/mxge/mxge_lro.c 220424 2011-04-07 13:49:44Z gallatin $");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
36 #include <sys/mbuf.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/bus.h>
41
42 #include <net/if.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
45
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
50
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
53
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
56
57 #include "opt_inet.h"
58
59 #ifdef INET
60
61 /* Assume len is a multiple of 4 */
62 static uint16_t
63 mxge_csum_generic(uint16_t *raw, int len)
64 {
65 uint32_t csum;
66 csum = 0;
67 while (len > 0) {
68 csum += *raw;
69 raw++;
70 csum += *raw;
71 raw++;
72 len -= 4;
73 }
74 csum = (csum >> 16) + (csum & 0xffff);
75 csum = (csum >> 16) + (csum & 0xffff);
76 return (uint16_t)csum;
77 }
78
79
80 void
81 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
82 {
83 mxge_softc_t *mgp = ss->sc;
84 struct ifnet *ifp;
85 struct ip *ip;
86 struct tcphdr *tcp;
87 uint32_t *ts_ptr;
88 uint32_t tcplen, tcp_csum;
89
90 if (lro->append_cnt) {
91 /* incorporate the new len into the ip header and
92 * re-calculate the checksum */
93 ip = lro->ip;
94 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
95 ip->ip_sum = 0;
96 ip->ip_sum = 0xffff ^
97 mxge_csum_generic((uint16_t*)ip,
98 sizeof (*ip));
99
100 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
101 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
102 lro->m_head->m_pkthdr.csum_data = 0xffff;
103 lro->m_head->m_pkthdr.len = lro->len;
104
105 /* incorporate the latest ack into the tcp header */
106 tcp = (struct tcphdr *) (ip + 1);
107 tcp->th_ack = lro->ack_seq;
108 tcp->th_win = lro->window;
109 /* incorporate latest timestamp into the tcp header */
110 if (lro->timestamp) {
111 ts_ptr = (uint32_t *)(tcp + 1);
112 ts_ptr[1] = htonl(lro->tsval);
113 ts_ptr[2] = lro->tsecr;
114 }
115 /*
116 * update checksum in tcp header by re-calculating the
117 * tcp pseudoheader checksum, and adding it to the checksum
118 * of the tcp payload data
119 */
120 tcp->th_sum = 0;
121 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
122 tcp_csum = lro->data_csum;
123 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
124 htons(tcplen + IPPROTO_TCP));
125 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
126 tcp->th_off << 2);
127 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
129 #if 0
130 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
131 in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
132 htons(tcplen + IPPROTO_TCP)),
133 mxge_csum_generic((uint16_t*)tcp,
134 tcp->th_off << 2),
135 htons(0xffff ^ tcp_csum));
136 #endif
137 tcp->th_sum = 0xffff ^ tcp_csum;
138 }
139 ifp = mgp->ifp;
140 (*ifp->if_input)(mgp->ifp, lro->m_head);
141 ss->lro_queued += lro->append_cnt + 1;
142 ss->lro_flushed++;
143 lro->m_head = NULL;
144 lro->timestamp = 0;
145 lro->append_cnt = 0;
146 SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
147 }
148
149 int
150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
151 {
152 struct ether_header *eh;
153 struct ip *ip;
154 struct tcphdr *tcp;
155 uint32_t *ts_ptr;
156 struct mbuf *m_nxt, *m_tail;
157 struct lro_entry *lro;
158 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
159 int opt_bytes, trim;
160 uint32_t seq, tmp_csum, device_mtu;
161
162 eh = mtod(m_head, struct ether_header *);
163 if (eh->ether_type != htons(ETHERTYPE_IP))
164 return 1;
165 ip = (struct ip *) (eh + 1);
166 if (ip->ip_p != IPPROTO_TCP)
167 return 1;
168
169 /* ensure there are no options */
170 if ((ip->ip_hl << 2) != sizeof (*ip))
171 return -1;
172
173 /* .. and the packet is not fragmented */
174 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
175 return -1;
176
177 /* verify that the IP header checksum is correct */
178 tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
179 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
180 ss->lro_bad_csum++;
181 return -1;
182 }
183
184 /* find the TCP header */
185 tcp = (struct tcphdr *) (ip + 1);
186
187 /* ensure no bits set besides ack or psh */
188 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
189 return -1;
190
191 /* check for timestamps. Since the only option we handle are
192 timestamps, we only have to handle the simple case of
193 aligned timestamps */
194
195 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
196 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
197 ts_ptr = (uint32_t *)(tcp + 1);
198 if (opt_bytes != 0) {
199 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
200 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
201 return -1;
202 }
203
204 ip_len = ntohs(ip->ip_len);
205 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
206
207
208 /*
209 * If frame is padded beyond the end of the IP packet,
210 * then we must trim the extra bytes off the end.
211 */
212 tot_len = m_head->m_pkthdr.len;
213 trim = tot_len - (ip_len + ETHER_HDR_LEN);
214 if (trim != 0) {
215 if (trim < 0) {
216 /* truncated packet */
217 return -1;
218 }
219 m_adj(m_head, -trim);
220 tot_len = m_head->m_pkthdr.len;
221 }
222
223 m_nxt = m_head;
224 m_tail = NULL; /* -Wuninitialized */
225 while (m_nxt != NULL) {
226 m_tail = m_nxt;
227 m_nxt = m_tail->m_next;
228 }
229
230 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
231 seq = ntohl(tcp->th_seq);
232
233 SLIST_FOREACH(lro, &ss->lro_active, next) {
234 if (lro->source_port == tcp->th_sport &&
235 lro->dest_port == tcp->th_dport &&
236 lro->source_ip == ip->ip_src.s_addr &&
237 lro->dest_ip == ip->ip_dst.s_addr) {
238 /* Try to append it */
239
240 if (__predict_false(seq != lro->next_seq ||
241 (tcp_data_len == 0 &&
242 lro->ack_seq == tcp->th_ack))) {
243 /* out of order packet or dup ack */
244 SLIST_REMOVE(&ss->lro_active, lro,
245 lro_entry, next);
246 mxge_lro_flush(ss, lro);
247 return -1;
248 }
249
250 if (opt_bytes) {
251 uint32_t tsval = ntohl(*(ts_ptr + 1));
252 /* make sure timestamp values are increasing */
253 if (__predict_false(lro->tsval > tsval ||
254 *(ts_ptr + 2) == 0)) {
255 return -1;
256 }
257 lro->tsval = tsval;
258 lro->tsecr = *(ts_ptr + 2);
259 }
260
261 lro->next_seq += tcp_data_len;
262 lro->ack_seq = tcp->th_ack;
263 lro->window = tcp->th_win;
264 lro->append_cnt++;
265 if (tcp_data_len == 0) {
266 m_freem(m_head);
267 return 0;
268 }
269 /* subtract off the checksum of the tcp header
270 * from the hardware checksum, and add it to the
271 * stored tcp data checksum. Byteswap the checksum
272 * if the total length so far is odd
273 */
274 tmp_csum = mxge_csum_generic((uint16_t*)tcp,
275 tcp_hdr_len);
276 csum = csum + (tmp_csum ^ 0xffff);
277 csum = (csum & 0xffff) + (csum >> 16);
278 csum = (csum & 0xffff) + (csum >> 16);
279 if (lro->len & 0x1) {
280 /* Odd number of bytes so far, flip bytes */
281 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
282 }
283 csum = csum + lro->data_csum;
284 csum = (csum & 0xffff) + (csum >> 16);
285 csum = (csum & 0xffff) + (csum >> 16);
286 lro->data_csum = csum;
287
288 lro->len += tcp_data_len;
289
290 /* adjust mbuf so that m->m_data points to
291 the first byte of the payload */
292 m_adj(m_head, hlen);
293 /* append mbuf chain */
294 lro->m_tail->m_next = m_head;
295 /* advance the last pointer */
296 lro->m_tail = m_tail;
297 /* flush packet if required */
298 device_mtu = ss->sc->ifp->if_mtu;
299 if (lro->len > (65535 - device_mtu)) {
300 SLIST_REMOVE(&ss->lro_active, lro,
301 lro_entry, next);
302 mxge_lro_flush(ss, lro);
303 }
304 return 0;
305 }
306 }
307
308 if (SLIST_EMPTY(&ss->lro_free))
309 return -1;
310
311 /* start a new chain */
312 lro = SLIST_FIRST(&ss->lro_free);
313 SLIST_REMOVE_HEAD(&ss->lro_free, next);
314 SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
315 lro->source_port = tcp->th_sport;
316 lro->dest_port = tcp->th_dport;
317 lro->source_ip = ip->ip_src.s_addr;
318 lro->dest_ip = ip->ip_dst.s_addr;
319 lro->next_seq = seq + tcp_data_len;
320 lro->mss = tcp_data_len;
321 lro->ack_seq = tcp->th_ack;
322 lro->window = tcp->th_win;
323
324 /* save the checksum of just the TCP payload by
325 * subtracting off the checksum of the TCP header from
326 * the entire hardware checksum
327 * Since IP header checksum is correct, checksum over
328 * the IP header is -0. Substracting -0 is unnecessary.
329 */
330 tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
331 csum = csum + (tmp_csum ^ 0xffff);
332 csum = (csum & 0xffff) + (csum >> 16);
333 csum = (csum & 0xffff) + (csum >> 16);
334 lro->data_csum = csum;
335
336 lro->ip = ip;
337 /* record timestamp if it is present */
338 if (opt_bytes) {
339 lro->timestamp = 1;
340 lro->tsval = ntohl(*(ts_ptr + 1));
341 lro->tsecr = *(ts_ptr + 2);
342 }
343 lro->len = tot_len;
344 lro->m_head = m_head;
345 lro->m_tail = m_tail;
346 return 0;
347 }
348
349 #endif /* INET */
350 /*
351 This file uses Myri10GE driver indentation.
352
353 Local Variables:
354 c-file-style:"linux"
355 tab-width:8
356 End:
357 */
Cache object: 83269bc9b57fec0eb0de557e0687d4df
|