1 /*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from tahoe: in_cksum.c 1.2 86/01/05
34 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
35 * $FreeBSD$
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/mbuf.h>
41
42 #include <netinet/in.h>
43 #include <netinet/in_systm.h>
44 #include <netinet/ip.h>
45
46 #include <machine/in_cksum.h>
47
48 /*
49 * Checksum routine for Internet Protocol family headers.
50 *
51 * This routine is very heavily used in the network
52 * code and should be modified for each CPU to be as fast as possible.
53 *
54 * This implementation is 386 version.
55 */
56
57 #undef ADDCARRY
58 #define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
59 #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
60
61 /*
62 * These asm statements require __volatile because they pass information
63 * via the condition codes. GCC does not currently provide a way to specify
64 * the condition codes as an input or output operand.
65 *
66 * The LOAD macro below is effectively a prefetch into cache. GCC will
67 * load the value into a register but will not use it. Since modern CPUs
68 * reorder operations, this will generally take place in parallel with
69 * other calculations.
70 */
71 #define ADD(n) __asm __volatile \
72 ("addl %1, %0" : "+r" (sum) : \
73 "g" (((const u_int32_t *)w)[n / 4]))
74 #define ADDC(n) __asm __volatile \
75 ("adcl %1, %0" : "+r" (sum) : \
76 "g" (((const u_int32_t *)w)[n / 4]))
77 #define LOAD(n) __asm __volatile \
78 ("" : : "r" (((const u_int32_t *)w)[n / 4]))
79 #define MOP __asm __volatile \
80 ("adcl $0, %0" : "+r" (sum))
81
82 int
83 in_cksum(m, len)
84 register struct mbuf *m;
85 register int len;
86 {
87 register u_short *w;
88 register unsigned sum = 0;
89 register int mlen = 0;
90 int byte_swapped = 0;
91 union { char c[2]; u_short s; } su;
92
93 for (;m && len; m = m->m_next) {
94 if (m->m_len == 0)
95 continue;
96 w = mtod(m, u_short *);
97 if (mlen == -1) {
98 /*
99 * The first byte of this mbuf is the continuation
100 * of a word spanning between this mbuf and the
101 * last mbuf.
102 */
103
104 /* su.c[0] is already saved when scanning previous
105 * mbuf. sum was REDUCEd when we found mlen == -1
106 */
107 su.c[1] = *(u_char *)w;
108 sum += su.s;
109 w = (u_short *)((char *)w + 1);
110 mlen = m->m_len - 1;
111 len--;
112 } else
113 mlen = m->m_len;
114 if (len < mlen)
115 mlen = len;
116 len -= mlen;
117 /*
118 * Force to long boundary so we do longword aligned
119 * memory operations
120 */
121 if (3 & (int) w) {
122 REDUCE;
123 if ((1 & (int) w) && (mlen > 0)) {
124 sum <<= 8;
125 su.c[0] = *(char *)w;
126 w = (u_short *)((char *)w + 1);
127 mlen--;
128 byte_swapped = 1;
129 }
130 if ((2 & (int) w) && (mlen >= 2)) {
131 sum += *w++;
132 mlen -= 2;
133 }
134 }
135 /*
136 * Advance to a 486 cache line boundary.
137 */
138 if (4 & (int) w && mlen >= 4) {
139 ADD(0);
140 MOP;
141 w += 2;
142 mlen -= 4;
143 }
144 if (8 & (int) w && mlen >= 8) {
145 ADD(0);
146 ADDC(4);
147 MOP;
148 w += 4;
149 mlen -= 8;
150 }
151 /*
152 * Do as much of the checksum as possible 32 bits at at time.
153 * In fact, this loop is unrolled to make overhead from
154 * branches &c small.
155 */
156 mlen -= 1;
157 while ((mlen -= 32) >= 0) {
158 /*
159 * Add with carry 16 words and fold in the last
160 * carry by adding a 0 with carry.
161 *
162 * The early ADD(16) and the LOAD(32) are to load
163 * the next 2 cache lines in advance on 486's. The
164 * 486 has a penalty of 2 clock cycles for loading
165 * a cache line, plus whatever time the external
166 * memory takes to load the first word(s) addressed.
167 * These penalties are unavoidable. Subsequent
168 * accesses to a cache line being loaded (and to
169 * other external memory?) are delayed until the
170 * whole load finishes. These penalties are mostly
171 * avoided by not accessing external memory for
172 * 8 cycles after the ADD(16) and 12 cycles after
173 * the LOAD(32). The loop terminates when mlen
174 * is initially 33 (not 32) to guaranteed that
175 * the LOAD(32) is within bounds.
176 */
177 ADD(16);
178 ADDC(0);
179 ADDC(4);
180 ADDC(8);
181 ADDC(12);
182 LOAD(32);
183 ADDC(20);
184 ADDC(24);
185 ADDC(28);
186 MOP;
187 w += 16;
188 }
189 mlen += 32 + 1;
190 if (mlen >= 32) {
191 ADD(16);
192 ADDC(0);
193 ADDC(4);
194 ADDC(8);
195 ADDC(12);
196 ADDC(20);
197 ADDC(24);
198 ADDC(28);
199 MOP;
200 w += 16;
201 mlen -= 32;
202 }
203 if (mlen >= 16) {
204 ADD(0);
205 ADDC(4);
206 ADDC(8);
207 ADDC(12);
208 MOP;
209 w += 8;
210 mlen -= 16;
211 }
212 if (mlen >= 8) {
213 ADD(0);
214 ADDC(4);
215 MOP;
216 w += 4;
217 mlen -= 8;
218 }
219 if (mlen == 0 && byte_swapped == 0)
220 continue; /* worth 1% maybe ?? */
221 REDUCE;
222 while ((mlen -= 2) >= 0) {
223 sum += *w++;
224 }
225 if (byte_swapped) {
226 sum <<= 8;
227 byte_swapped = 0;
228 if (mlen == -1) {
229 su.c[1] = *(char *)w;
230 sum += su.s;
231 mlen = 0;
232 } else
233 mlen = -1;
234 } else if (mlen == -1)
235 /*
236 * This mbuf has odd number of bytes.
237 * There could be a word split betwen
238 * this mbuf and the next mbuf.
239 * Save the last byte (to prepend to next mbuf).
240 */
241 su.c[0] = *(char *)w;
242 }
243
244 if (len)
245 printf("%s: out of data by %d\n", __func__, len);
246 if (mlen == -1) {
247 /* The last mbuf has odd # of bytes. Follow the
248 standard (the odd byte is shifted left by 8 bits) */
249 su.c[1] = 0;
250 sum += su.s;
251 }
252 REDUCE;
253 return (~sum & 0xffff);
254 }
255
256 u_short
257 in_cksum_skip(m, len, skip)
258 struct mbuf *m;
259 int len;
260 int skip;
261 {
262 register u_short *w;
263 register unsigned sum = 0;
264 register int mlen = 0;
265 int byte_swapped = 0;
266 union { char c[2]; u_short s; } su;
267
268 len -= skip;
269 for (; skip && m; m = m->m_next) {
270 if (m->m_len > skip) {
271 mlen = m->m_len - skip;
272 w = (u_short *)(mtod(m, u_char *) + skip);
273 goto skip_start;
274 } else {
275 skip -= m->m_len;
276 }
277 }
278
279 for (;m && len; m = m->m_next) {
280 if (m->m_len == 0)
281 continue;
282 w = mtod(m, u_short *);
283 if (mlen == -1) {
284 /*
285 * The first byte of this mbuf is the continuation
286 * of a word spanning between this mbuf and the
287 * last mbuf.
288 */
289
290 /* su.c[0] is already saved when scanning previous
291 * mbuf. sum was REDUCEd when we found mlen == -1
292 */
293 su.c[1] = *(u_char *)w;
294 sum += su.s;
295 w = (u_short *)((char *)w + 1);
296 mlen = m->m_len - 1;
297 len--;
298 } else
299 mlen = m->m_len;
300 skip_start:
301 if (len < mlen)
302 mlen = len;
303 len -= mlen;
304 /*
305 * Force to long boundary so we do longword aligned
306 * memory operations
307 */
308 if (3 & (int) w) {
309 REDUCE;
310 if ((1 & (int) w) && (mlen > 0)) {
311 sum <<= 8;
312 su.c[0] = *(char *)w;
313 w = (u_short *)((char *)w + 1);
314 mlen--;
315 byte_swapped = 1;
316 }
317 if ((2 & (int) w) && (mlen >= 2)) {
318 sum += *w++;
319 mlen -= 2;
320 }
321 }
322 /*
323 * Advance to a 486 cache line boundary.
324 */
325 if (4 & (int) w && mlen >= 4) {
326 ADD(0);
327 MOP;
328 w += 2;
329 mlen -= 4;
330 }
331 if (8 & (int) w && mlen >= 8) {
332 ADD(0);
333 ADDC(4);
334 MOP;
335 w += 4;
336 mlen -= 8;
337 }
338 /*
339 * Do as much of the checksum as possible 32 bits at at time.
340 * In fact, this loop is unrolled to make overhead from
341 * branches &c small.
342 */
343 mlen -= 1;
344 while ((mlen -= 32) >= 0) {
345 /*
346 * Add with carry 16 words and fold in the last
347 * carry by adding a 0 with carry.
348 *
349 * The early ADD(16) and the LOAD(32) are to load
350 * the next 2 cache lines in advance on 486's. The
351 * 486 has a penalty of 2 clock cycles for loading
352 * a cache line, plus whatever time the external
353 * memory takes to load the first word(s) addressed.
354 * These penalties are unavoidable. Subsequent
355 * accesses to a cache line being loaded (and to
356 * other external memory?) are delayed until the
357 * whole load finishes. These penalties are mostly
358 * avoided by not accessing external memory for
359 * 8 cycles after the ADD(16) and 12 cycles after
360 * the LOAD(32). The loop terminates when mlen
361 * is initially 33 (not 32) to guaranteed that
362 * the LOAD(32) is within bounds.
363 */
364 ADD(16);
365 ADDC(0);
366 ADDC(4);
367 ADDC(8);
368 ADDC(12);
369 LOAD(32);
370 ADDC(20);
371 ADDC(24);
372 ADDC(28);
373 MOP;
374 w += 16;
375 }
376 mlen += 32 + 1;
377 if (mlen >= 32) {
378 ADD(16);
379 ADDC(0);
380 ADDC(4);
381 ADDC(8);
382 ADDC(12);
383 ADDC(20);
384 ADDC(24);
385 ADDC(28);
386 MOP;
387 w += 16;
388 mlen -= 32;
389 }
390 if (mlen >= 16) {
391 ADD(0);
392 ADDC(4);
393 ADDC(8);
394 ADDC(12);
395 MOP;
396 w += 8;
397 mlen -= 16;
398 }
399 if (mlen >= 8) {
400 ADD(0);
401 ADDC(4);
402 MOP;
403 w += 4;
404 mlen -= 8;
405 }
406 if (mlen == 0 && byte_swapped == 0)
407 continue; /* worth 1% maybe ?? */
408 REDUCE;
409 while ((mlen -= 2) >= 0) {
410 sum += *w++;
411 }
412 if (byte_swapped) {
413 sum <<= 8;
414 byte_swapped = 0;
415 if (mlen == -1) {
416 su.c[1] = *(char *)w;
417 sum += su.s;
418 mlen = 0;
419 } else
420 mlen = -1;
421 } else if (mlen == -1)
422 /*
423 * This mbuf has odd number of bytes.
424 * There could be a word split betwen
425 * this mbuf and the next mbuf.
426 * Save the last byte (to prepend to next mbuf).
427 */
428 su.c[0] = *(char *)w;
429 }
430
431 if (len)
432 printf("%s: out of data by %d\n", __func__, len);
433 if (mlen == -1) {
434 /* The last mbuf has odd # of bytes. Follow the
435 standard (the odd byte is shifted left by 8 bits) */
436 su.c[1] = 0;
437 sum += su.s;
438 }
439 REDUCE;
440 return (~sum & 0xffff);
441 }
442
443 /*
444 * This is the exact same algorithm as above with a few exceptions:
445 * (1) it is designed to operate on buffers, not mbufs
446 * (2) it returns an intermediate form of the sum which has to be
447 * explicitly finalized (but this can be delayed)
448 * (3) it accepts an intermediate sum
449 *
450 * This is particularly useful when building packets quickly,
451 * since one can compute the checksum of the pseudoheader ahead of
452 * time and then use this function to complete the work. That way,
453 * the pseudoheader never actually has to exist in the packet buffer,
454 * which avoids needless duplication of work.
455 */
456 in_psum_t
457 in_cksum_partial(psum, w, len)
458 in_psum_t psum;
459 const u_short *w;
460 int len;
461 {
462 register in_psum_t sum = psum;
463 int byte_swapped = 0;
464 union { char c[2]; u_short s; } su;
465
466 /*
467 * Force to long boundary so we do longword aligned
468 * memory operations
469 */
470 if (3 & (int) w) {
471 REDUCE;
472 if ((1 & (int) w) && (len > 0)) {
473 sum <<= 8;
474 su.c[0] = *(const char *)w;
475 w = (const u_short *)((const char *)w + 1);
476 len--;
477 byte_swapped = 1;
478 }
479 if ((2 & (int) w) && (len >= 2)) {
480 sum += *w++;
481 len -= 2;
482 }
483 }
484 /*
485 * Advance to a 486 cache line boundary.
486 */
487 if (4 & (int) w && len >= 4) {
488 ADD(0);
489 MOP;
490 w += 2;
491 len -= 4;
492 }
493 if (8 & (int) w && len >= 8) {
494 ADD(0);
495 ADDC(4);
496 MOP;
497 w += 4;
498 len -= 8;
499 }
500 /*
501 * Do as much of the checksum as possible 32 bits at at time.
502 * In fact, this loop is unrolled to make overhead from
503 * branches &c small.
504 */
505 len -= 1;
506 while ((len -= 32) >= 0) {
507 /*
508 * Add with carry 16 words and fold in the last
509 * carry by adding a 0 with carry.
510 *
511 * The early ADD(16) and the LOAD(32) are to load
512 * the next 2 cache lines in advance on 486's. The
513 * 486 has a penalty of 2 clock cycles for loading
514 * a cache line, plus whatever time the external
515 * memory takes to load the first word(s) addressed.
516 * These penalties are unavoidable. Subsequent
517 * accesses to a cache line being loaded (and to
518 * other external memory?) are delayed until the
519 * whole load finishes. These penalties are mostly
520 * avoided by not accessing external memory for
521 * 8 cycles after the ADD(16) and 12 cycles after
522 * the LOAD(32). The loop terminates when len
523 * is initially 33 (not 32) to guaranteed that
524 * the LOAD(32) is within bounds.
525 */
526 ADD(16);
527 ADDC(0);
528 ADDC(4);
529 ADDC(8);
530 ADDC(12);
531 LOAD(32);
532 ADDC(20);
533 ADDC(24);
534 ADDC(28);
535 MOP;
536 w += 16;
537 }
538 len += 32 + 1;
539 if (len >= 32) {
540 ADD(16);
541 ADDC(0);
542 ADDC(4);
543 ADDC(8);
544 ADDC(12);
545 ADDC(20);
546 ADDC(24);
547 ADDC(28);
548 MOP;
549 w += 16;
550 len -= 32;
551 }
552 if (len >= 16) {
553 ADD(0);
554 ADDC(4);
555 ADDC(8);
556 ADDC(12);
557 MOP;
558 w += 8;
559 len -= 16;
560 }
561 if (len >= 8) {
562 ADD(0);
563 ADDC(4);
564 MOP;
565 w += 4;
566 len -= 8;
567 }
568 if (len == 0 && byte_swapped == 0)
569 goto out;
570 REDUCE;
571 while ((len -= 2) >= 0) {
572 sum += *w++;
573 }
574 if (byte_swapped) {
575 sum <<= 8;
576 byte_swapped = 0;
577 if (len == -1) {
578 su.c[1] = *(const char *)w;
579 sum += su.s;
580 len = 0;
581 } else
582 len = -1;
583 } else if (len == -1) {
584 /*
585 * This buffer has odd number of bytes.
586 * There could be a word split betwen
587 * this buffer and the next.
588 */
589 su.c[0] = *(const char *)w;
590 }
591 out:
592 if (len == -1) {
593 /* The last buffer has odd # of bytes. Follow the
594 standard (the odd byte is shifted left by 8 bits) */
595 su.c[1] = 0;
596 sum += su.s;
597 }
598 return sum;
599 }
600
601 int
602 in_cksum_finalize(psum)
603 in_psum_t psum;
604 {
605 in_psum_t sum = psum;
606 REDUCE;
607 return (~sum & 0xffff);
608 }
Cache object: 565398b9e4a4fa609008282236cfcfc2
|