The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/in_cksum_arm.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: in_cksum_arm.S,v 1.2 2003/09/23 10:01:36 scw Exp $     */
    2 
    3 /*-
    4  * Copyright 2003 Wasabi Systems, Inc.
    5  * All rights reserved.
    6  *
    7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed for the NetBSD Project by
   20  *      Wasabi Systems, Inc.
   21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   22  *    or promote products derived from this software without specific prior
   23  *    written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35  * POSSIBILITY OF SUCH DAMAGE.
   36  *
   37  */
   38 
   39 /*
   40  * Hand-optimised in_cksum() and in4_cksum() implementations for ARM/armv5e
   41  */
   42 
   43 #include "opt_inet.h"
   44 
   45 #include <machine/asm.h>
   46 #include "assym.s"
   47 __FBSDID("$FreeBSD: releng/10.2/sys/arm/arm/in_cksum_arm.S 275767 2014-12-14 16:28:53Z andrew $");
   48 
   49         .syntax unified
   50 /*
   51  * int in_cksum(struct mbuf *m, int len)
   52  *
   53  * Entry:
   54  *      r0      m
   55  *      r1      len
   56  *
   57  * NOTE: Assumes 'm' is *never* NULL.
   58  */
   59 /* LINTSTUB: Func: int in_cksum(struct mbuf *, int) */
   60 ENTRY(in_cksum)
   61         stmfd   sp!, {r4-r11,lr}
   62         mov     r8, #0x00
   63         mov     r9, r1
   64         mov     r10, #0x00
   65         mov     ip, r0
   66 
   67 .Lin_cksum_loop:
   68         ldr     r1, [ip, #(M_LEN)]
   69         ldr     r0, [ip, #(M_DATA)]
   70         ldr     ip, [ip, #(M_NEXT)]
   71 .Lin_cksum_entry4:
   72         cmp     r9, r1
   73         movlt   r1, r9
   74         sub     r9, r9, r1
   75         eor     r11, r10, r0
   76         add     r10, r10, r1
   77         adds    r2, r1, #0x00
   78         blne    _ASM_LABEL(L_cksumdata)
   79         tst     r11, #0x01
   80         movne   r2, r2, ror #8
   81         adds    r8, r8, r2
   82         adc     r8, r8, #0x00
   83         cmp     ip, #0x00
   84         bne     .Lin_cksum_loop
   85 
   86         mov     r1, #0xff
   87         orr     r1, r1, #0xff00
   88         and     r0, r8, r1
   89         add     r0, r0, r8, lsr #16
   90         add     r0, r0, r0, lsr #16
   91         and     r0, r0, r1
   92         eor     r0, r0, r1
   93         ldmfd   sp!, {r4-r11,pc}
   94 END(in_cksum)
   95 
   96 ENTRY(do_cksum)
   97         stmfd   sp!, {r4-r7, lr}
   98         bl      L_cksumdata
   99         mov     r0, r2
  100         ldmfd   sp!, {r4-r7, pc}
  101 END(do_cksum)
  102 
  103 /*
  104  * The main in*_cksum() workhorse...
  105  *
  106  * Entry parameters:
  107  *      r0      Pointer to buffer
  108  *      r1      Buffer length
  109  *      lr      Return address
  110  *
  111  * Returns:
  112  *      r2      Accumulated 32-bit sum
  113  *
  114  * Clobbers:
  115  *      r0-r7
  116  */
  117 /* LINTSTUB: Ignore */
  118 ASENTRY_NP(L_cksumdata)
  119 #ifdef _ARM_ARCH_5E
  120         pld     [r0]                    /* Pre-fetch the start of the buffer */
  121 #endif
  122         mov     r2, #0
  123 
  124         /* We first have to word-align the buffer.  */
  125         ands    r7, r0, #0x03
  126         beq     .Lcksumdata_wordaligned
  127         rsb     r7, r7, #0x04
  128         cmp     r1, r7                  /* Enough bytes left to make it? */
  129         blt     .Lcksumdata_endgame
  130         cmp     r7, #0x02
  131         ldrb    r4, [r0], #0x01         /* Fetch 1st byte */
  132         ldrbge  r5, [r0], #0x01         /* Fetch 2nd byte */
  133         movlt   r5, #0x00
  134         ldrbgt  r6, [r0], #0x01         /* Fetch 3rd byte */
  135         movle   r6, #0x00
  136         /* Combine the three bytes depending on endianness and alignment */
  137 #ifdef __ARMEB__
  138         orreq   r2, r5, r4, lsl #8
  139         orreq   r2, r2, r6, lsl #24
  140         orrne   r2, r4, r5, lsl #8
  141         orrne   r2, r2, r6, lsl #16
  142 #else
  143         orreq   r2, r4, r5, lsl #8
  144         orreq   r2, r2, r6, lsl #16
  145         orrne   r2, r5, r4, lsl #8
  146         orrne   r2, r2, r6, lsl #24
  147 #endif
  148         subs    r1, r1, r7              /* Update length */
  149         RETeq                   /* All done? */
  150 
  151         /* Buffer is now word aligned */
  152 .Lcksumdata_wordaligned:
  153 #ifdef _ARM_ARCH_5E
  154         cmp     r1, #0x04               /* Less than 4 bytes left? */
  155         blt     .Lcksumdata_endgame     /* Yup */
  156 
  157         /* Now quad-align, if necessary */
  158         ands    r7, r0, #0x04
  159         ldrne   r7, [r0], #0x04
  160         subne   r1, r1, #0x04
  161         subs    r1, r1, #0x40
  162         blt     .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */
  163 
  164         /*
  165          * Buffer is now quad aligned. Sum 64 bytes at a time.
  166          * Note: First ldrd is hoisted above the loop, together with
  167          * setting r6 to zero to avoid stalling for results in the
  168          * loop. (r7 is live, from above).
  169          */
  170         ldrd    r4, [r0], #0x08
  171         mov     r6, #0x00
  172 .Lcksumdata_bigloop:
  173         pld     [r0, #0x18]
  174         adds    r2, r2, r6
  175         adcs    r2, r2, r7
  176         ldrd    r6, [r0], #0x08
  177         adcs    r2, r2, r4
  178         adcs    r2, r2, r5
  179         ldrd    r4, [r0], #0x08
  180         adcs    r2, r2, r6
  181         adcs    r2, r2, r7
  182         ldrd    r6, [r0], #0x08
  183         adcs    r2, r2, r4
  184         adcs    r2, r2, r5
  185         ldrd    r4, [r0], #0x08
  186         adcs    r2, r2, r6
  187         adcs    r2, r2, r7
  188         pld     [r0, #0x18]
  189         ldrd    r6, [r0], #0x08
  190         adcs    r2, r2, r4
  191         adcs    r2, r2, r5
  192         ldrd    r4, [r0], #0x08
  193         adcs    r2, r2, r6
  194         adcs    r2, r2, r7
  195         ldrd    r6, [r0], #0x08
  196         adcs    r2, r2, r4
  197         adcs    r2, r2, r5
  198         adc     r2, r2, #0x00
  199         subs    r1, r1, #0x40
  200         ldrdge  r4, [r0], #0x08
  201         bge     .Lcksumdata_bigloop
  202 
  203         adds    r2, r2, r6              /* r6/r7 still need summing */
  204 .Lcksumdata_bigloop_end:
  205         adcs    r2, r2, r7
  206         adc     r2, r2, #0x00
  207 
  208 #else   /* !_ARM_ARCH_5E */
  209 
  210         subs    r1, r1, #0x40
  211         blt     .Lcksumdata_bigloop_end
  212 
  213 .Lcksumdata_bigloop:
  214         ldmia   r0!, {r3, r4, r5, r6}
  215         adds    r2, r2, r3
  216         adcs    r2, r2, r4
  217         adcs    r2, r2, r5
  218         ldmia   r0!, {r3, r4, r5, r7}
  219         adcs    r2, r2, r6
  220         adcs    r2, r2, r3
  221         adcs    r2, r2, r4
  222         adcs    r2, r2, r5
  223         ldmia   r0!, {r3, r4, r5, r6}
  224         adcs    r2, r2, r7
  225         adcs    r2, r2, r3
  226         adcs    r2, r2, r4
  227         adcs    r2, r2, r5
  228         ldmia   r0!, {r3, r4, r5, r7}
  229         adcs    r2, r2, r6
  230         adcs    r2, r2, r3
  231         adcs    r2, r2, r4
  232         adcs    r2, r2, r5
  233         adcs    r2, r2, r7
  234         adc     r2, r2, #0x00
  235         subs    r1, r1, #0x40
  236         bge     .Lcksumdata_bigloop
  237 .Lcksumdata_bigloop_end:
  238 #endif
  239 
  240         adds    r1, r1, #0x40
  241         RETeq
  242         cmp     r1, #0x20
  243 
  244 #ifdef _ARM_ARCH_5E
  245         ldrdge  r4, [r0], #0x08         /* Avoid stalling pld and result */
  246         blt     .Lcksumdata_less_than_32
  247         pld     [r0, #0x18]
  248         ldrd    r6, [r0], #0x08
  249         adds    r2, r2, r4
  250         adcs    r2, r2, r5
  251         ldrd    r4, [r0], #0x08
  252         adcs    r2, r2, r6
  253         adcs    r2, r2, r7
  254         ldrd    r6, [r0], #0x08
  255         adcs    r2, r2, r4
  256         adcs    r2, r2, r5
  257         adcs    r2, r2, r6              /* XXX: Unavoidable result stall */
  258         adcs    r2, r2, r7
  259 #else
  260         blt     .Lcksumdata_less_than_32
  261         ldmia   r0!, {r3, r4, r5, r6}
  262         adds    r2, r2, r3
  263         adcs    r2, r2, r4
  264         adcs    r2, r2, r5
  265         ldmia   r0!, {r3, r4, r5, r7}
  266         adcs    r2, r2, r6
  267         adcs    r2, r2, r3
  268         adcs    r2, r2, r4
  269         adcs    r2, r2, r5
  270         adcs    r2, r2, r7
  271 #endif
  272         adc     r2, r2, #0x00
  273         subs    r1, r1, #0x20
  274         RETeq
  275 
  276 .Lcksumdata_less_than_32:
  277         /* There are less than 32 bytes left */
  278         and     r3, r1, #0x18
  279         rsb     r4, r3, #0x18
  280         sub     r1, r1, r3
  281         adds    r4, r4, r4, lsr #1      /* Side effect: Clear carry flag */
  282         addne   pc, pc, r4
  283         nop
  284 
  285 /*
  286  * Note: We use ldm here, even on armv5e, since the combined issue/result
  287  * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs.
  288  */
  289         /* At least 24 bytes remaining... */
  290         ldmia   r0!, {r4, r5}
  291         adcs    r2, r2, r4
  292         adcs    r2, r2, r5
  293 
  294         /* At least 16 bytes remaining... */
  295         ldmia   r0!, {r4, r5}
  296         adcs    r2, r2, r4
  297         adcs    r2, r2, r5
  298 
  299         /* At least 8 bytes remaining... */
  300         ldmia   r0!, {r4, r5}
  301         adcs    r2, r2, r4
  302         adcs    r2, r2, r5
  303 
  304         /* Less than 8 bytes remaining... */
  305         adc     r2, r2, #0x00
  306         subs    r1, r1, #0x04
  307         blt     .Lcksumdata_lessthan4
  308 
  309         ldr     r4, [r0], #0x04
  310         sub     r1, r1, #0x04
  311         adds    r2, r2, r4
  312         adc     r2, r2, #0x00
  313 
  314         /* Deal with < 4 bytes remaining */
  315 .Lcksumdata_lessthan4:
  316         adds    r1, r1, #0x04
  317         RETeq
  318 
  319         /* Deal with 1 to 3 remaining bytes, possibly misaligned */
  320 .Lcksumdata_endgame:
  321         ldrb    r3, [r0]                /* Fetch first byte */
  322         cmp     r1, #0x02
  323         ldrbge  r4, [r0, #0x01]         /* Fetch 2nd and 3rd as necessary */
  324         movlt   r4, #0x00
  325         ldrbgt  r5, [r0, #0x02]
  326         movle   r5, #0x00
  327         /* Combine the three bytes depending on endianness and alignment */
  328         tst     r0, #0x01
  329 #ifdef __ARMEB__
  330         orreq   r3, r4, r3, lsl #8
  331         orreq   r3, r3, r5, lsl #24
  332         orrne   r3, r3, r4, lsl #8
  333         orrne   r3, r3, r5, lsl #16
  334 #else
  335         orreq   r3, r3, r4, lsl #8
  336         orreq   r3, r3, r5, lsl #16
  337         orrne   r3, r4, r3, lsl #8
  338         orrne   r3, r3, r5, lsl #24
  339 #endif
  340         adds    r2, r2, r3
  341         adc     r2, r2, #0x00
  342         RET
  343 END(L_cksumdata)
  344 

Cache object: 2c5b28ba3b55be4fcd3b84f48fe08750


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.