in_cksum.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1990 The Regents of the University of California.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      from tahoe:     in_cksum.c      1.2     86/01/05
   34  *      from:           @(#)in_cksum.c  1.3 (Berkeley) 1/19/91
   35  * $FreeBSD$
   36  */
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/mbuf.h>
   41 
   42 #include <netinet/in.h>
   43 #include <netinet/in_systm.h>
   44 #include <netinet/ip.h>
   45 
   46 #include <machine/in_cksum.h>
   47 
   48 /*
   49  * Checksum routine for Internet Protocol family headers.
   50  *
   51  * This routine is very heavily used in the network
   52  * code and should be modified for each CPU to be as fast as possible.
   53  *
   54  * This implementation is 386 version.
   55  */
   56 
   57 #undef  ADDCARRY
   58 #define ADDCARRY(x)     if ((x) > 0xffff) (x) -= 0xffff
   59 #define REDUCE          {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
   60 
   61 /*
   62  * These asm statements require __volatile because they pass information
   63  * via the condition codes.  GCC does not currently provide a way to specify
   64  * the condition codes as an input or output operand.
   65  *
   66  * The LOAD macro below is effectively a prefetch into cache.  GCC will
   67  * load the value into a register but will not use it.  Since modern CPUs
   68  * reorder operations, this will generally take place in parallel with
   69  * other calculations.
   70  */
   71 #define ADD(n)  __asm __volatile \
   72                 ("addl %1, %0" : "+r" (sum) : \
   73                 "g" (((const u_int32_t *)w)[n / 4]))
   74 #define ADDC(n) __asm __volatile \
   75                 ("adcl %1, %0" : "+r" (sum) : \
   76                 "g" (((const u_int32_t *)w)[n / 4]))
   77 #define LOAD(n) __asm __volatile \
   78                 ("" : : "r" (((const u_int32_t *)w)[n / 4]))
   79 #define MOP     __asm __volatile \
   80                 ("adcl         $0, %0" : "+r" (sum))
   81 
   82 int
   83 in_cksum(m, len)
   84         register struct mbuf *m;
   85         register int len;
   86 {
   87         register u_short *w;
   88         register unsigned sum = 0;
   89         register int mlen = 0;
   90         int byte_swapped = 0;
   91         union { char    c[2]; u_short   s; } su;
   92 
   93         for (;m && len; m = m->m_next) {
   94                 if (m->m_len == 0)
   95                         continue;
   96                 w = mtod(m, u_short *);
   97                 if (mlen == -1) {
   98                         /*
   99                          * The first byte of this mbuf is the continuation
  100                          * of a word spanning between this mbuf and the
  101                          * last mbuf.
  102                          */
  103 
  104                         /* su.c[0] is already saved when scanning previous
  105                          * mbuf.  sum was REDUCEd when we found mlen == -1
  106                          */
  107                         su.c[1] = *(u_char *)w;
  108                         sum += su.s;
  109                         w = (u_short *)((char *)w + 1);
  110                         mlen = m->m_len - 1;
  111                         len--;
  112                 } else
  113                         mlen = m->m_len;
  114                 if (len < mlen)
  115                         mlen = len;
  116                 len -= mlen;
  117                 /*
  118                  * Force to long boundary so we do longword aligned
  119                  * memory operations
  120                  */
  121                 if (3 & (int) w) {
  122                         REDUCE;
  123                         if ((1 & (int) w) && (mlen > 0)) {
  124                                 sum <<= 8;
  125                                 su.c[0] = *(char *)w;
  126                                 w = (u_short *)((char *)w + 1);
  127                                 mlen--;
  128                                 byte_swapped = 1;
  129                         }
  130                         if ((2 & (int) w) && (mlen >= 2)) {
  131                                 sum += *w++;
  132                                 mlen -= 2;
  133                         }
  134                 }
  135                 /*
  136                  * Advance to a 486 cache line boundary.
  137                  */
  138                 if (4 & (int) w && mlen >= 4) {
  139                         ADD(0);
  140                         MOP;
  141                         w += 2;
  142                         mlen -= 4;
  143                 }
  144                 if (8 & (int) w && mlen >= 8) {
  145                         ADD(0);
  146                         ADDC(4);
  147                         MOP;
  148                         w += 4;
  149                         mlen -= 8;
  150                 }
  151                 /*
  152                  * Do as much of the checksum as possible 32 bits at at time.
  153                  * In fact, this loop is unrolled to make overhead from
  154                  * branches &c small.
  155                  */
  156                 mlen -= 1;
  157                 while ((mlen -= 32) >= 0) {
  158                         /*
  159                          * Add with carry 16 words and fold in the last
  160                          * carry by adding a 0 with carry.
  161                          *
  162                          * The early ADD(16) and the LOAD(32) are to load
  163                          * the next 2 cache lines in advance on 486's.  The
  164                          * 486 has a penalty of 2 clock cycles for loading
  165                          * a cache line, plus whatever time the external
  166                          * memory takes to load the first word(s) addressed.
  167                          * These penalties are unavoidable.  Subsequent
  168                          * accesses to a cache line being loaded (and to
  169                          * other external memory?) are delayed until the
  170                          * whole load finishes.  These penalties are mostly
  171                          * avoided by not accessing external memory for
  172                          * 8 cycles after the ADD(16) and 12 cycles after
  173                          * the LOAD(32).  The loop terminates when mlen
  174                          * is initially 33 (not 32) to guaranteed that
  175                          * the LOAD(32) is within bounds.
  176                          */
  177                         ADD(16);
  178                         ADDC(0);
  179                         ADDC(4);
  180                         ADDC(8);
  181                         ADDC(12);
  182                         LOAD(32);
  183                         ADDC(20);
  184                         ADDC(24);
  185                         ADDC(28);
  186                         MOP;
  187                         w += 16;
  188                 }
  189                 mlen += 32 + 1;
  190                 if (mlen >= 32) {
  191                         ADD(16);
  192                         ADDC(0);
  193                         ADDC(4);
  194                         ADDC(8);
  195                         ADDC(12);
  196                         ADDC(20);
  197                         ADDC(24);
  198                         ADDC(28);
  199                         MOP;
  200                         w += 16;
  201                         mlen -= 32;
  202                 }
  203                 if (mlen >= 16) {
  204                         ADD(0);
  205                         ADDC(4);
  206                         ADDC(8);
  207                         ADDC(12);
  208                         MOP;
  209                         w += 8;
  210                         mlen -= 16;
  211                 }
  212                 if (mlen >= 8) {
  213                         ADD(0);
  214                         ADDC(4);
  215                         MOP;
  216                         w += 4;
  217                         mlen -= 8;
  218                 }
  219                 if (mlen == 0 && byte_swapped == 0)
  220                         continue;       /* worth 1% maybe ?? */
  221                 REDUCE;
  222                 while ((mlen -= 2) >= 0) {
  223                         sum += *w++;
  224                 }
  225                 if (byte_swapped) {
  226                         sum <<= 8;
  227                         byte_swapped = 0;
  228                         if (mlen == -1) {
  229                                 su.c[1] = *(char *)w;
  230                                 sum += su.s;
  231                                 mlen = 0;
  232                         } else
  233                                 mlen = -1;
  234                 } else if (mlen == -1)
  235                         /*
  236                          * This mbuf has odd number of bytes.
  237                          * There could be a word split betwen
  238                          * this mbuf and the next mbuf.
  239                          * Save the last byte (to prepend to next mbuf).
  240                          */
  241                         su.c[0] = *(char *)w;
  242         }
  243 
  244         if (len)
  245                 printf("%s: out of data by %d\n", __func__, len);
  246         if (mlen == -1) {
  247                 /* The last mbuf has odd # of bytes. Follow the
  248                    standard (the odd byte is shifted left by 8 bits) */
  249                 su.c[1] = 0;
  250                 sum += su.s;
  251         }
  252         REDUCE;
  253         return (~sum & 0xffff);
  254 }
  255 
  256 u_short
  257 in_cksum_skip(m, len, skip)
  258         struct mbuf *m;
  259         int len;
  260         int skip;
  261 {
  262         register u_short *w;
  263         register unsigned sum = 0;
  264         register int mlen = 0;
  265         int byte_swapped = 0;
  266         union { char    c[2]; u_short   s; } su;
  267 
  268         len -= skip;
  269         for (; skip && m; m = m->m_next) {
  270                 if (m->m_len > skip) {
  271                         mlen = m->m_len - skip;
  272                         w = (u_short *)(mtod(m, u_char *) + skip);
  273                         goto skip_start;
  274                 } else {
  275                         skip -= m->m_len;
  276                 }
  277         }
  278 
  279         for (;m && len; m = m->m_next) {
  280                 if (m->m_len == 0)
  281                         continue;
  282                 w = mtod(m, u_short *);
  283                 if (mlen == -1) {
  284                         /*
  285                          * The first byte of this mbuf is the continuation
  286                          * of a word spanning between this mbuf and the
  287                          * last mbuf.
  288                          */
  289 
  290                         /* su.c[0] is already saved when scanning previous
  291                          * mbuf.  sum was REDUCEd when we found mlen == -1
  292                          */
  293                         su.c[1] = *(u_char *)w;
  294                         sum += su.s;
  295                         w = (u_short *)((char *)w + 1);
  296                         mlen = m->m_len - 1;
  297                         len--;
  298                 } else
  299                         mlen = m->m_len;
  300 skip_start:
  301                 if (len < mlen)
  302                         mlen = len;
  303                 len -= mlen;
  304                 /*
  305                  * Force to long boundary so we do longword aligned
  306                  * memory operations
  307                  */
  308                 if (3 & (int) w) {
  309                         REDUCE;
  310                         if ((1 & (int) w) && (mlen > 0)) {
  311                                 sum <<= 8;
  312                                 su.c[0] = *(char *)w;
  313                                 w = (u_short *)((char *)w + 1);
  314                                 mlen--;
  315                                 byte_swapped = 1;
  316                         }
  317                         if ((2 & (int) w) && (mlen >= 2)) {
  318                                 sum += *w++;
  319                                 mlen -= 2;
  320                         }
  321                 }
  322                 /*
  323                  * Advance to a 486 cache line boundary.
  324                  */
  325                 if (4 & (int) w && mlen >= 4) {
  326                         ADD(0);
  327                         MOP;
  328                         w += 2;
  329                         mlen -= 4;
  330                 }
  331                 if (8 & (int) w && mlen >= 8) {
  332                         ADD(0);
  333                         ADDC(4);
  334                         MOP;
  335                         w += 4;
  336                         mlen -= 8;
  337                 }
  338                 /*
  339                  * Do as much of the checksum as possible 32 bits at at time.
  340                  * In fact, this loop is unrolled to make overhead from
  341                  * branches &c small.
  342                  */
  343                 mlen -= 1;
  344                 while ((mlen -= 32) >= 0) {
  345                         /*
  346                          * Add with carry 16 words and fold in the last
  347                          * carry by adding a 0 with carry.
  348                          *
  349                          * The early ADD(16) and the LOAD(32) are to load
  350                          * the next 2 cache lines in advance on 486's.  The
  351                          * 486 has a penalty of 2 clock cycles for loading
  352                          * a cache line, plus whatever time the external
  353                          * memory takes to load the first word(s) addressed.
  354                          * These penalties are unavoidable.  Subsequent
  355                          * accesses to a cache line being loaded (and to
  356                          * other external memory?) are delayed until the
  357                          * whole load finishes.  These penalties are mostly
  358                          * avoided by not accessing external memory for
  359                          * 8 cycles after the ADD(16) and 12 cycles after
  360                          * the LOAD(32).  The loop terminates when mlen
  361                          * is initially 33 (not 32) to guaranteed that
  362                          * the LOAD(32) is within bounds.
  363                          */
  364                         ADD(16);
  365                         ADDC(0);
  366                         ADDC(4);
  367                         ADDC(8);
  368                         ADDC(12);
  369                         LOAD(32);
  370                         ADDC(20);
  371                         ADDC(24);
  372                         ADDC(28);
  373                         MOP;
  374                         w += 16;
  375                 }
  376                 mlen += 32 + 1;
  377                 if (mlen >= 32) {
  378                         ADD(16);
  379                         ADDC(0);
  380                         ADDC(4);
  381                         ADDC(8);
  382                         ADDC(12);
  383                         ADDC(20);
  384                         ADDC(24);
  385                         ADDC(28);
  386                         MOP;
  387                         w += 16;
  388                         mlen -= 32;
  389                 }
  390                 if (mlen >= 16) {
  391                         ADD(0);
  392                         ADDC(4);
  393                         ADDC(8);
  394                         ADDC(12);
  395                         MOP;
  396                         w += 8;
  397                         mlen -= 16;
  398                 }
  399                 if (mlen >= 8) {
  400                         ADD(0);
  401                         ADDC(4);
  402                         MOP;
  403                         w += 4;
  404                         mlen -= 8;
  405                 }
  406                 if (mlen == 0 && byte_swapped == 0)
  407                         continue;       /* worth 1% maybe ?? */
  408                 REDUCE;
  409                 while ((mlen -= 2) >= 0) {
  410                         sum += *w++;
  411                 }
  412                 if (byte_swapped) {
  413                         sum <<= 8;
  414                         byte_swapped = 0;
  415                         if (mlen == -1) {
  416                                 su.c[1] = *(char *)w;
  417                                 sum += su.s;
  418                                 mlen = 0;
  419                         } else
  420                                 mlen = -1;
  421                 } else if (mlen == -1)
  422                         /*
  423                          * This mbuf has odd number of bytes.
  424                          * There could be a word split betwen
  425                          * this mbuf and the next mbuf.
  426                          * Save the last byte (to prepend to next mbuf).
  427                          */
  428                         su.c[0] = *(char *)w;
  429         }
  430 
  431         if (len)
  432                 printf("%s: out of data by %d\n", __func__, len);
  433         if (mlen == -1) {
  434                 /* The last mbuf has odd # of bytes. Follow the
  435                    standard (the odd byte is shifted left by 8 bits) */
  436                 su.c[1] = 0;
  437                 sum += su.s;
  438         }
  439         REDUCE;
  440         return (~sum & 0xffff);
  441 }
  442 
  443 /*
  444  * This is the exact same algorithm as above with a few exceptions:
  445  * (1) it is designed to operate on buffers, not mbufs
  446  * (2) it returns an intermediate form of the sum which has to be
  447  *     explicitly finalized (but this can be delayed)
  448  * (3) it accepts an intermediate sum
  449  *
  450  * This is particularly useful when building packets quickly,
  451  * since one can compute the checksum of the pseudoheader ahead of
  452  * time and then use this function to complete the work.  That way,
  453  * the pseudoheader never actually has to exist in the packet buffer,
  454  * which avoids needless duplication of work.
  455  */
  456 in_psum_t
  457 in_cksum_partial(psum, w, len)
  458         in_psum_t psum;
  459         const u_short *w;
  460         int len;
  461 {
  462         register in_psum_t sum = psum;
  463         int byte_swapped = 0;
  464         union { char    c[2]; u_short   s; } su;
  465 
  466         /*
  467          * Force to long boundary so we do longword aligned
  468          * memory operations
  469          */
  470         if (3 & (int) w) {
  471                 REDUCE;
  472                 if ((1 & (int) w) && (len > 0)) {
  473                         sum <<= 8;
  474                         su.c[0] = *(const char *)w;
  475                         w = (const u_short *)((const char *)w + 1);
  476                         len--;
  477                         byte_swapped = 1;
  478                 }
  479                 if ((2 & (int) w) && (len >= 2)) {
  480                         sum += *w++;
  481                         len -= 2;
  482                 }
  483         }
  484         /*
  485          * Advance to a 486 cache line boundary.
  486          */
  487         if (4 & (int) w && len >= 4) {
  488                 ADD(0);
  489                 MOP;
  490                 w += 2;
  491                 len -= 4;
  492         }
  493         if (8 & (int) w && len >= 8) {
  494                 ADD(0);
  495                 ADDC(4);
  496                 MOP;
  497                 w += 4;
  498                 len -= 8;
  499         }
  500         /*
  501          * Do as much of the checksum as possible 32 bits at at time.
  502          * In fact, this loop is unrolled to make overhead from
  503          * branches &c small.
  504          */
  505         len -= 1;
  506         while ((len -= 32) >= 0) {
  507                 /*
  508                  * Add with carry 16 words and fold in the last
  509                  * carry by adding a 0 with carry.
  510                  *
  511                  * The early ADD(16) and the LOAD(32) are to load
  512                  * the next 2 cache lines in advance on 486's.  The
  513                  * 486 has a penalty of 2 clock cycles for loading
  514                  * a cache line, plus whatever time the external
  515                  * memory takes to load the first word(s) addressed.
  516                  * These penalties are unavoidable.  Subsequent
  517                  * accesses to a cache line being loaded (and to
  518                  * other external memory?) are delayed until the
  519                  * whole load finishes.  These penalties are mostly
  520                  * avoided by not accessing external memory for
  521                  * 8 cycles after the ADD(16) and 12 cycles after
  522                  * the LOAD(32).  The loop terminates when len
  523                  * is initially 33 (not 32) to guaranteed that
  524                  * the LOAD(32) is within bounds.
  525                  */
  526                 ADD(16);
  527                 ADDC(0);
  528                 ADDC(4);
  529                 ADDC(8);
  530                 ADDC(12);
  531                 LOAD(32);
  532                 ADDC(20);
  533                 ADDC(24);
  534                 ADDC(28);
  535                 MOP;
  536                 w += 16;
  537         }
  538         len += 32 + 1;
  539         if (len >= 32) {
  540                 ADD(16);
  541                 ADDC(0);
  542                 ADDC(4);
  543                 ADDC(8);
  544                 ADDC(12);
  545                 ADDC(20);
  546                 ADDC(24);
  547                 ADDC(28);
  548                 MOP;
  549                 w += 16;
  550                 len -= 32;
  551         }
  552         if (len >= 16) {
  553                 ADD(0);
  554                 ADDC(4);
  555                 ADDC(8);
  556                 ADDC(12);
  557                 MOP;
  558                 w += 8;
  559                 len -= 16;
  560         }
  561         if (len >= 8) {
  562                 ADD(0);
  563                 ADDC(4);
  564                 MOP;
  565                 w += 4;
  566                 len -= 8;
  567         }
  568         if (len == 0 && byte_swapped == 0)
  569                 goto out;
  570         REDUCE;
  571         while ((len -= 2) >= 0) {
  572                 sum += *w++;
  573         }
  574         if (byte_swapped) {
  575                 sum <<= 8;
  576                 byte_swapped = 0;
  577                 if (len == -1) {
  578                         su.c[1] = *(const char *)w;
  579                         sum += su.s;
  580                         len = 0;
  581                 } else
  582                         len = -1;
  583         } else if (len == -1) {
  584                 /*
  585                  * This buffer has odd number of bytes.
  586                  * There could be a word split betwen
  587                  * this buffer and the next.
  588                  */
  589                 su.c[0] = *(const char *)w;
  590         }
  591 out:
  592         if (len == -1) {
  593                 /* The last buffer has odd # of bytes. Follow the
  594                    standard (the odd byte is shifted left by 8 bits) */
  595                 su.c[1] = 0;
  596                 sum += su.s;
  597         }
  598         return sum;
  599 }
  600 
  601 int
  602 in_cksum_finalize(psum)
  603         in_psum_t psum;
  604 {
  605         in_psum_t sum = psum;
  606         REDUCE;
  607         return (~sum & 0xffff);
  608 }
Cache object: 565398b9e4a4fa609008282236cfcfc2
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/i386/i386/in_cksum.c

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/in_cksum.c