The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/cpu_in_cksum.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: cpu_in_cksum.c,v 1.2 2018/08/28 07:28:01 rin Exp $     */
    2 /*-
    3  * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  *
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in
   14  *    the documentation and/or other materials provided with the
   15  *    distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   20  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   21  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   22  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   27  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __KERNEL_RCSID(0, "$NetBSD: cpu_in_cksum.c,v 1.2 2018/08/28 07:28:01 rin Exp $");
   33 
   34 #include <sys/param.h>
   35 #include <sys/endian.h>
   36 #include <sys/mbuf.h>
   37 #ifdef _KERNEL
   38 #include <sys/systm.h>
   39 #else
   40 #include <assert.h>
   41 #include <stdbool.h>
   42 #include <stdio.h>
   43 
   44 #define KASSERT(x) assert(x)
   45 #endif
   46 
   47 #include <machine/limits.h>
   48 
   49 #include <netinet/in.h>
   50 
   51 #ifndef _KERNEL
   52 int     cpu_in_cksum(struct mbuf*, int, int, uint32_t);
   53 #endif
   54 
   55 /*
   56  * Checksum routine for Internet Protocol family headers (Portable Version).
   57  *
   58  * This routine is very heavily used in the network
   59  * code and should be modified for each CPU to be as fast as possible.
   60  *
   61  * A discussion of different implementation techniques can be found in
   62  * RFC 1071.
   63  *
   64  * The default implementation for 32bit architectures is using
   65  * a 32bit accumulator and operating on 16bit operands.
   66  *
   67  * The default implementation for 64bit architectures is using
   68  * a 64bit accumulator and operating on 32bit operands.
   69  *
   70  * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
   71  * of the inner loop. After each iteration of the inner loop, a partial
   72  * reduction is done to avoid carry in long packets.
   73  */
   74 
   75 #if ULONG_MAX == 0xffffffffUL
   76 /* 32bit version */
   77 int
   78 cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
   79 {
   80         int mlen;
   81         uint32_t sum, partial;
   82         unsigned int final_acc;
   83         uint8_t *data;
   84         bool needs_swap, started_on_odd;
   85 
   86         KASSERT(len >= 0);
   87         KASSERT(off >= 0);
   88 
   89         needs_swap = false;
   90         started_on_odd = false;
   91         sum = (initial_sum >> 16) + (initial_sum & 0xffff);
   92 
   93         for (;;) {
   94                 if (__predict_false(m == NULL)) {
   95                         printf("in_cksum: out of data\n");
   96                         return -1;
   97                 }
   98                 mlen = m->m_len;
   99                 if (mlen > off) {
  100                         mlen -= off;
  101                         data = mtod(m, uint8_t *) + off;
  102                         goto post_initial_offset;
  103                 }
  104                 off -= mlen;
  105                 if (len == 0)
  106                         break;
  107                 m = m->m_next;
  108         }
  109 
  110         for (; len > 0; m = m->m_next) {
  111                 if (__predict_false(m == NULL)) {
  112                         printf("in_cksum: out of data\n");
  113                         return -1;
  114                 }
  115                 mlen = m->m_len;
  116                 data = mtod(m, uint8_t *);
  117  post_initial_offset:
  118                 if (mlen == 0)
  119                         continue;
  120                 if (mlen > len)
  121                         mlen = len;
  122                 len -= mlen;
  123 
  124                 partial = 0;
  125                 if ((uintptr_t)data & 1) {
  126                         /* Align on word boundary */
  127                         started_on_odd = !started_on_odd;
  128 #if _BYTE_ORDER == _LITTLE_ENDIAN
  129                         partial = *data << 8;
  130 #else
  131                         partial = *data;
  132 #endif
  133                         ++data;
  134                         --mlen;
  135                 }
  136                 needs_swap = started_on_odd;
  137                 while (mlen >= 32) {
  138                         __builtin_prefetch(data + 32);
  139                         partial += *(uint16_t *)data;
  140                         partial += *(uint16_t *)(data + 2);
  141                         partial += *(uint16_t *)(data + 4);
  142                         partial += *(uint16_t *)(data + 6);
  143                         partial += *(uint16_t *)(data + 8);
  144                         partial += *(uint16_t *)(data + 10);
  145                         partial += *(uint16_t *)(data + 12);
  146                         partial += *(uint16_t *)(data + 14);
  147                         partial += *(uint16_t *)(data + 16);
  148                         partial += *(uint16_t *)(data + 18);
  149                         partial += *(uint16_t *)(data + 20);
  150                         partial += *(uint16_t *)(data + 22);
  151                         partial += *(uint16_t *)(data + 24);
  152                         partial += *(uint16_t *)(data + 26);
  153                         partial += *(uint16_t *)(data + 28);
  154                         partial += *(uint16_t *)(data + 30);
  155                         data += 32;
  156                         mlen -= 32;
  157                         if (__predict_false(partial & 0xc0000000)) {
  158                                 if (needs_swap)
  159                                         partial = (partial << 8) + (partial >> 24);
  160                                 sum += (partial >> 16);
  161                                 sum += (partial & 0xffff);
  162                                 partial = 0;
  163                         }
  164                 }
  165                 /*
  166                  * mlen is not updated below as the remaining tests
  167                  * are using bit masks, which are not affected.
  168                  */
  169                 if (mlen & 16) {
  170                         partial += *(uint16_t *)data;
  171                         partial += *(uint16_t *)(data + 2);
  172                         partial += *(uint16_t *)(data + 4);
  173                         partial += *(uint16_t *)(data + 6);
  174                         partial += *(uint16_t *)(data + 8);
  175                         partial += *(uint16_t *)(data + 10);
  176                         partial += *(uint16_t *)(data + 12);
  177                         partial += *(uint16_t *)(data + 14);
  178                         data += 16;
  179                 }
  180                 if (mlen & 8) {
  181                         partial += *(uint16_t *)data;
  182                         partial += *(uint16_t *)(data + 2);
  183                         partial += *(uint16_t *)(data + 4);
  184                         partial += *(uint16_t *)(data + 6);
  185                         data += 8;
  186                 }
  187                 if (mlen & 4) {
  188                         partial += *(uint16_t *)data;
  189                         partial += *(uint16_t *)(data + 2);
  190                         data += 4;
  191                 }
  192                 if (mlen & 2) {
  193                         partial += *(uint16_t *)data;
  194                         data += 2;
  195                 }
  196                 if (mlen & 1) {
  197 #if _BYTE_ORDER == _LITTLE_ENDIAN
  198                         partial += *data;
  199 #else
  200                         partial += *data << 8;
  201 #endif
  202                         started_on_odd = !started_on_odd;
  203                 }
  204 
  205                 if (needs_swap)
  206                         partial = (partial << 8) + (partial >> 24);
  207                 sum += (partial >> 16) + (partial & 0xffff);
  208                 /*
  209                  * Reduce sum to allow potential byte swap
  210                  * in the next iteration without carry.
  211                  */
  212                 sum = (sum >> 16) + (sum & 0xffff);
  213         }
  214         final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
  215         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
  216         return ~final_acc & 0xffff;
  217 }
  218 
  219 #else
  220 /* 64bit version */
  221 int
  222 cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
  223 {
  224         int mlen;
  225         uint64_t sum, partial;
  226         unsigned int final_acc;
  227         uint8_t *data;
  228         bool needs_swap, started_on_odd;
  229 
  230         KASSERT(len >= 0);
  231         KASSERT(off >= 0);
  232 
  233         needs_swap = false;
  234         started_on_odd = false;
  235         sum = initial_sum;
  236 
  237         for (;;) {
  238                 if (__predict_false(m == NULL)) {
  239                         printf("in_cksum: out of data\n");
  240                         return -1;
  241                 }
  242                 mlen = m->m_len;
  243                 if (mlen > off) {
  244                         mlen -= off;
  245                         data = mtod(m, uint8_t *) + off;
  246                         goto post_initial_offset;
  247                 }
  248                 off -= mlen;
  249                 if (len == 0)
  250                         break;
  251                 m = m->m_next;
  252         }
  253 
  254         for (; len > 0; m = m->m_next) {
  255                 if (__predict_false(m == NULL)) {
  256                         printf("in_cksum: out of data\n");
  257                         return -1;
  258                 }
  259                 mlen = m->m_len;
  260                 data = mtod(m, uint8_t *);
  261  post_initial_offset:
  262                 if (mlen == 0)
  263                         continue;
  264                 if (mlen > len)
  265                         mlen = len;
  266                 len -= mlen;
  267 
  268                 partial = 0;
  269                 if ((uintptr_t)data & 1) {
  270                         /* Align on word boundary */
  271                         started_on_odd = !started_on_odd;
  272 #if _BYTE_ORDER == _LITTLE_ENDIAN
  273                         partial = *data << 8;
  274 #else
  275                         partial = *data;
  276 #endif
  277                         ++data;
  278                         --mlen;
  279                 }
  280                 needs_swap = started_on_odd;
  281                 if ((uintptr_t)data & 2) {
  282                         if (mlen < 2)
  283                                 goto trailing_bytes;
  284                         partial += *(uint16_t *)data;
  285                         data += 2;
  286                         mlen -= 2;
  287                 }
  288                 while (mlen >= 64) {
  289                         __builtin_prefetch(data + 32);
  290                         __builtin_prefetch(data + 64);
  291                         partial += *(uint32_t *)data;
  292                         partial += *(uint32_t *)(data + 4);
  293                         partial += *(uint32_t *)(data + 8);
  294                         partial += *(uint32_t *)(data + 12);
  295                         partial += *(uint32_t *)(data + 16);
  296                         partial += *(uint32_t *)(data + 20);
  297                         partial += *(uint32_t *)(data + 24);
  298                         partial += *(uint32_t *)(data + 28);
  299                         partial += *(uint32_t *)(data + 32);
  300                         partial += *(uint32_t *)(data + 36);
  301                         partial += *(uint32_t *)(data + 40);
  302                         partial += *(uint32_t *)(data + 44);
  303                         partial += *(uint32_t *)(data + 48);
  304                         partial += *(uint32_t *)(data + 52);
  305                         partial += *(uint32_t *)(data + 56);
  306                         partial += *(uint32_t *)(data + 60);
  307                         data += 64;
  308                         mlen -= 64;
  309                         if (__predict_false(partial & (3ULL << 62))) {
  310                                 if (needs_swap)
  311                                         partial = (partial << 8) + (partial >> 56);
  312                                 sum += (partial >> 32);
  313                                 sum += (partial & 0xffffffff);
  314                                 partial = 0;
  315                         }
  316                 }
  317                 /*
  318                  * mlen is not updated below as the remaining tests
  319                  * are using bit masks, which are not affected.
  320                  */
  321                 if (mlen & 32) {
  322                         partial += *(uint32_t *)data;
  323                         partial += *(uint32_t *)(data + 4);
  324                         partial += *(uint32_t *)(data + 8);
  325                         partial += *(uint32_t *)(data + 12);
  326                         partial += *(uint32_t *)(data + 16);
  327                         partial += *(uint32_t *)(data + 20);
  328                         partial += *(uint32_t *)(data + 24);
  329                         partial += *(uint32_t *)(data + 28);
  330                         data += 32;
  331                 }
  332                 if (mlen & 16) {
  333                         partial += *(uint32_t *)data;
  334                         partial += *(uint32_t *)(data + 4);
  335                         partial += *(uint32_t *)(data + 8);
  336                         partial += *(uint32_t *)(data + 12);
  337                         data += 16;
  338                 }
  339                 if (mlen & 8) {
  340                         partial += *(uint32_t *)data;
  341                         partial += *(uint32_t *)(data + 4);
  342                         data += 8;
  343                 }
  344                 if (mlen & 4) {
  345                         partial += *(uint32_t *)data;
  346                         data += 4;
  347                 }
  348                 if (mlen & 2) {
  349                         partial += *(uint16_t *)data;
  350                         data += 2;
  351                 }
  352  trailing_bytes:
  353                 if (mlen & 1) {
  354 #if _BYTE_ORDER == _LITTLE_ENDIAN
  355                         partial += *data;
  356 #else
  357                         partial += *data << 8;
  358 #endif
  359                         started_on_odd = !started_on_odd;
  360                 }
  361 
  362                 if (needs_swap)
  363                         partial = (partial << 8) + (partial >> 56);
  364                 sum += (partial >> 32) + (partial & 0xffffffff);
  365                 /*
  366                  * Reduce sum to allow potential byte swap
  367                  * in the next iteration without carry.
  368                  */
  369                 sum = (sum >> 32) + (sum & 0xffffffff);
  370         }
  371         final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
  372             ((sum >> 16) & 0xffff) + (sum & 0xffff);
  373         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
  374         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
  375         return ~final_acc & 0xffff;
  376 }
  377 #endif

Cache object: 2163c5bfe808938ded1821811f1fbc38


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.