The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/cpu_in_cksum.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: cpu_in_cksum.c,v 1.1 2008/01/25 21:12:14 joerg Exp $   */
    2 /*-
    3  * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  *
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in
   14  *    the documentation and/or other materials provided with the
   15  *    distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   20  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   21  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   22  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   27  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __KERNEL_RCSID(0, "$NetBSD: cpu_in_cksum.c,v 1.1 2008/01/25 21:12:14 joerg Exp $");
   33 
   34 #include <sys/param.h>
   35 #include <sys/endian.h>
   36 #include <sys/mbuf.h>
   37 #ifdef _KERNEL
   38 #include <sys/systm.h>
   39 #else
   40 #include <assert.h>
   41 #include <stdbool.h>
   42 #include <stdio.h>
   43 
   44 #define KASSERT(x) assert(x)
   45 #endif
   46 
   47 #include <machine/limits.h>
   48 
   49 #include <netinet/in.h>
   50 
   51 #ifndef _KERNEL
   52 int     cpu_in_cksum(struct mbuf*, int, int, uint32_t);
   53 #endif
   54 
   55 /*
   56  * Checksum routine for Internet Protocol family headers (Portable Version).
   57  *
   58  * This routine is very heavily used in the network
   59  * code and should be modified for each CPU to be as fast as possible.
   60  *
   61  * A discussion of different implementation techniques can be found in
   62  * RFC 1071.
   63  *
   64  * The default implementation for 32bit architectures is using
   65  * a 32bit accumulator and operating on 16bit operands.
   66  *
   67  * The default implementation for 64bit architectures is using
   68  * a 64bit accumulator and operating on 32bit operands.
   69  *
   70  * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
   71  * of the inner loop. After each iteration of the inner loop, a partial
   72  * reduction is done to avoid carry in long packets.
   73  */
   74 
   75 #if ULONG_MAX == 0xffffffffUL
   76 /* 32bit version */
   77 int
   78 cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
   79 {
   80         int mlen;
   81         uint32_t sum, partial;
   82         unsigned int final_acc;
   83         uint8_t *data;
   84         bool needs_swap, started_on_odd;
   85 
   86         KASSERT(len >= 0);
   87         KASSERT(off >= 0);
   88 
   89         needs_swap = false;
   90         started_on_odd = false;
   91         sum = (initial_sum >> 16) + (initial_sum & 0xffff);
   92 
   93         for (;;) {
   94                 if (__predict_false(m == NULL)) {
   95                         printf("in_cksum: out of data\n");
   96                         return -1;
   97                 }
   98                 mlen = m->m_len;
   99                 if (mlen > off) {
  100                         mlen -= off;
  101                         data = mtod(m, uint8_t *) + off;
  102                         goto post_initial_offset;
  103                 }
  104                 off -= mlen;
  105                 if (len == 0)
  106                         break;
  107                 m = m->m_next;
  108         }
  109 
  110         for (; len > 0; m = m->m_next) {
  111                 if (__predict_false(m == NULL)) {
  112                         printf("in_cksum: out of data\n");
  113                         return -1;
  114                 }
  115                 mlen = m->m_len;
  116                 data = mtod(m, uint8_t *);
  117  post_initial_offset:
  118                 if (mlen == 0)
  119                         continue;
  120                 if (mlen > len)
  121                         mlen = len;
  122                 len -= mlen;
  123 
  124                 partial = 0;
  125                 if ((uintptr_t)data & 1) {
  126                         /* Align on word boundary */
  127                         started_on_odd = !started_on_odd;
  128 #if _BYTE_ORDER == _LITTLE_ENDIAN
  129                         partial = *data << 8;
  130 #else
  131                         partial = *data;
  132 #endif
  133                         ++data;
  134                         --mlen;
  135                 }
  136                 needs_swap = started_on_odd;
  137                 while (mlen >= 32) {
  138                         __builtin_prefetch(data + 32);
  139                         partial += *(uint16_t *)data;
  140                         partial += *(uint16_t *)(data + 2);
  141                         partial += *(uint16_t *)(data + 4);
  142                         partial += *(uint16_t *)(data + 6);
  143                         partial += *(uint16_t *)(data + 8);
  144                         partial += *(uint16_t *)(data + 10);
  145                         partial += *(uint16_t *)(data + 12);
  146                         partial += *(uint16_t *)(data + 14);
  147                         partial += *(uint16_t *)(data + 16);
  148                         partial += *(uint16_t *)(data + 18);
  149                         partial += *(uint16_t *)(data + 20);
  150                         partial += *(uint16_t *)(data + 22);
  151                         partial += *(uint16_t *)(data + 24);
  152                         partial += *(uint16_t *)(data + 26);
  153                         partial += *(uint16_t *)(data + 28);
  154                         partial += *(uint16_t *)(data + 30);
  155                         data += 32;
  156                         mlen -= 32;
  157                         if (__predict_false(partial & 0xc0000000)) {
  158                                 if (needs_swap)
  159                                         partial = (partial << 8) + (partial >> 24);
  160                                 sum += (partial >> 16);
  161                                 sum += (partial & 0xffff);
  162                                 partial = 0;
  163                         }
  164                 }
  165                 if (mlen & 16) {
  166                         partial += *(uint16_t *)data;
  167                         partial += *(uint16_t *)(data + 2);
  168                         partial += *(uint16_t *)(data + 4);
  169                         partial += *(uint16_t *)(data + 6);
  170                         partial += *(uint16_t *)(data + 8);
  171                         partial += *(uint16_t *)(data + 10);
  172                         partial += *(uint16_t *)(data + 12);
  173                         partial += *(uint16_t *)(data + 14);
  174                         data += 16;
  175                         mlen -= 16;
  176                 }
  177                 /*
  178                  * mlen is not updated below as the remaining tests
  179                  * are using bit masks, which are not affected.
  180                  */
  181                 if (mlen & 8) {
  182                         partial += *(uint16_t *)data;
  183                         partial += *(uint16_t *)(data + 2);
  184                         partial += *(uint16_t *)(data + 4);
  185                         partial += *(uint16_t *)(data + 6);
  186                         data += 8;
  187                 }
  188                 if (mlen & 4) {
  189                         partial += *(uint16_t *)data;
  190                         partial += *(uint16_t *)(data + 2);
  191                         data += 4;
  192                 }
  193                 if (mlen & 2) {
  194                         partial += *(uint16_t *)data;
  195                         data += 2;
  196                 }
  197                 if (mlen & 1) {
  198 #if _BYTE_ORDER == _LITTLE_ENDIAN
  199                         partial += *data;
  200 #else
  201                         partial += *data << 8;
  202 #endif
  203                         started_on_odd = !started_on_odd;
  204                 }
  205 
  206                 if (needs_swap)
  207                         partial = (partial << 8) + (partial >> 24);
  208                 sum += (partial >> 16) + (partial & 0xffff);
  209                 /*
  210                  * Reduce sum to allow potential byte swap
  211                  * in the next iteration without carry.
  212                  */
  213                 sum = (sum >> 16) + (sum & 0xffff);
  214         }
  215         final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
  216         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
  217         return ~final_acc & 0xffff;
  218 }
  219 
  220 #else
  221 /* 64bit version */
  222 int
  223 cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
  224 {
  225         int mlen;
  226         uint64_t sum, partial;
  227         unsigned int final_acc;
  228         uint8_t *data;
  229         bool needs_swap, started_on_odd;
  230 
  231         KASSERT(len >= 0);
  232         KASSERT(off >= 0);
  233 
  234         needs_swap = false;
  235         started_on_odd = false;
  236         sum = initial_sum;
  237 
  238         for (;;) {
  239                 if (__predict_false(m == NULL)) {
  240                         printf("in_cksum: out of data\n");
  241                         return -1;
  242                 }
  243                 mlen = m->m_len;
  244                 if (mlen > off) {
  245                         mlen -= off;
  246                         data = mtod(m, uint8_t *) + off;
  247                         goto post_initial_offset;
  248                 }
  249                 off -= mlen;
  250                 if (len == 0)
  251                         break;
  252                 m = m->m_next;
  253         }
  254 
  255         for (; len > 0; m = m->m_next) {
  256                 if (__predict_false(m == NULL)) {
  257                         printf("in_cksum: out of data\n");
  258                         return -1;
  259                 }
  260                 mlen = m->m_len;
  261                 data = mtod(m, uint8_t *);
  262  post_initial_offset:
  263                 if (mlen == 0)
  264                         continue;
  265                 if (mlen > len)
  266                         mlen = len;
  267                 len -= mlen;
  268 
  269                 partial = 0;
  270                 if ((uintptr_t)data & 1) {
  271                         /* Align on word boundary */
  272                         started_on_odd = !started_on_odd;
  273 #if _BYTE_ORDER == _LITTLE_ENDIAN
  274                         partial = *data << 8;
  275 #else
  276                         partial = *data;
  277 #endif
  278                         ++data;
  279                         --mlen;
  280                 }
  281                 needs_swap = started_on_odd;
  282                 if ((uintptr_t)data & 2) {
  283                         if (mlen < 2)
  284                                 goto trailing_bytes;
  285                         partial += *(uint16_t *)data;
  286                         data += 2;
  287                         mlen -= 2;
  288                 }
  289                 while (mlen >= 64) {
  290                         __builtin_prefetch(data + 32);
  291                         __builtin_prefetch(data + 64);
  292                         partial += *(uint32_t *)data;
  293                         partial += *(uint32_t *)(data + 4);
  294                         partial += *(uint32_t *)(data + 8);
  295                         partial += *(uint32_t *)(data + 12);
  296                         partial += *(uint32_t *)(data + 16);
  297                         partial += *(uint32_t *)(data + 20);
  298                         partial += *(uint32_t *)(data + 24);
  299                         partial += *(uint32_t *)(data + 28);
  300                         partial += *(uint32_t *)(data + 32);
  301                         partial += *(uint32_t *)(data + 36);
  302                         partial += *(uint32_t *)(data + 40);
  303                         partial += *(uint32_t *)(data + 44);
  304                         partial += *(uint32_t *)(data + 48);
  305                         partial += *(uint32_t *)(data + 52);
  306                         partial += *(uint32_t *)(data + 56);
  307                         partial += *(uint32_t *)(data + 60);
  308                         data += 64;
  309                         mlen -= 64;
  310                         if (__predict_false(partial & (3ULL << 62))) {
  311                                 if (needs_swap)
  312                                         partial = (partial << 8) + (partial >> 56);
  313                                 sum += (partial >> 32);
  314                                 sum += (partial & 0xffffffff);
  315                                 partial = 0;
  316                         }
  317                 }
  318                 /*
  319                  * mlen is not updated below as the remaining tests
  320                  * are using bit masks, which are not affected.
  321                  */
  322                 if (mlen & 32) {
  323                         partial += *(uint32_t *)data;
  324                         partial += *(uint32_t *)(data + 4);
  325                         partial += *(uint32_t *)(data + 8);
  326                         partial += *(uint32_t *)(data + 12);
  327                         partial += *(uint32_t *)(data + 16);
  328                         partial += *(uint32_t *)(data + 20);
  329                         partial += *(uint32_t *)(data + 24);
  330                         partial += *(uint32_t *)(data + 28);
  331                         data += 32;
  332                 }
  333                 if (mlen & 16) {
  334                         partial += *(uint32_t *)data;
  335                         partial += *(uint32_t *)(data + 4);
  336                         partial += *(uint32_t *)(data + 8);
  337                         partial += *(uint32_t *)(data + 12);
  338                         data += 16;
  339                 }
  340                 if (mlen & 8) {
  341                         partial += *(uint32_t *)data;
  342                         partial += *(uint32_t *)(data + 4);
  343                         data += 8;
  344                 }
  345                 if (mlen & 4) {
  346                         partial += *(uint32_t *)data;
  347                         data += 4;
  348                 }
  349                 if (mlen & 2) {
  350                         partial += *(uint16_t *)data;
  351                         data += 2;
  352                 }
  353  trailing_bytes:
  354                 if (mlen & 1) {
  355 #if _BYTE_ORDER == _LITTLE_ENDIAN
  356                         partial += *data;
  357 #else
  358                         partial += *data << 8;
  359 #endif
  360                         started_on_odd = !started_on_odd;
  361                 }
  362 
  363                 if (needs_swap)
  364                         partial = (partial << 8) + (partial >> 56);
  365                 sum += (partial >> 32) + (partial & 0xffffffff);
  366                 /*
  367                  * Reduce sum to allow potential byte swap
  368                  * in the next iteration without carry.
  369                  */
  370                 sum = (sum >> 32) + (sum & 0xffffffff);
  371         }
  372         final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
  373             ((sum >> 16) & 0xffff) + (sum & 0xffff);
  374         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
  375         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
  376         return ~final_acc & 0xffff;
  377 }
  378 #endif

Cache object: 1ae7efbf2d49813f81492f200c7fcf14


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.