[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/in_cksum_arm.S

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*      $NetBSD: in_cksum_arm.S,v 1.2 2003/09/23 10:01:36 scw Exp $     */
  2 
  3 /*-
  4  * Copyright 2003 Wasabi Systems, Inc.
  5  * All rights reserved.
  6  *
  7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
  8  *
  9  * Redistribution and use in source and binary forms, with or without
 10  * modification, are permitted provided that the following conditions
 11  * are met:
 12  * 1. Redistributions of source code must retain the above copyright
 13  *    notice, this list of conditions and the following disclaimer.
 14  * 2. Redistributions in binary form must reproduce the above copyright
 15  *    notice, this list of conditions and the following disclaimer in the
 16  *    documentation and/or other materials provided with the distribution.
 17  * 3. All advertising materials mentioning features or use of this software
 18  *    must display the following acknowledgement:
 19  *      This product includes software developed for the NetBSD Project by
 20  *      Wasabi Systems, Inc.
 21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 22  *    or promote products derived from this software without specific prior
 23  *    written permission.
 24  *
 25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35  * POSSIBILITY OF SUCH DAMAGE.
 36  *
 37  */
 38 
 39 /*
 40  * Hand-optimised in_cksum() and in4_cksum() implementations for ARM/armv5e
 41  */
 42 
 43 #include "opt_inet.h"
 44 
 45 #include <machine/asm.h>
 46 #include "assym.s"
 47 __FBSDID("$FreeBSD: src/sys/arm/arm/in_cksum_arm.S,v 1.7 2007/10/13 12:05:03 cognet Exp $");
 48 
 49 /*
 50  * int in_cksum(struct mbuf *m, int len)
 51  *
 52  * Entry:
 53  *      r0      m
 54  *      r1      len
 55  *
 56  * NOTE: Assumes 'm' is *never* NULL.
 57  */
 58 /* LINTSTUB: Func: int in_cksum(struct mbuf *, int) */
 59 ENTRY(in_cksum)
 60         stmfd   sp!, {r4-r11,lr}
 61         mov     r8, #0x00
 62         mov     r9, r1
 63         mov     r10, #0x00
 64         mov     ip, r0
 65 
 66 .Lin_cksum_loop:
 67         ldr     r1, [ip, #(M_LEN)]
 68         ldr     r0, [ip, #(M_DATA)]
 69         ldr     ip, [ip, #(M_NEXT)]
 70 .Lin_cksum_entry4:
 71         cmp     r9, r1
 72         movlt   r1, r9
 73         sub     r9, r9, r1
 74         eor     r11, r10, r0
 75         add     r10, r10, r1
 76         adds    r2, r1, #0x00
 77         blne    _ASM_LABEL(L_cksumdata)
 78         tst     r11, #0x01
 79         movne   r2, r2, ror #8
 80         adds    r8, r8, r2
 81         adc     r8, r8, #0x00
 82         cmp     ip, #0x00
 83         bne     .Lin_cksum_loop
 84 
 85         mov     r1, #0xff
 86         orr     r1, r1, #0xff00
 87         and     r0, r8, r1
 88         add     r0, r0, r8, lsr #16
 89         add     r0, r0, r0, lsr #16
 90         and     r0, r0, r1
 91         eor     r0, r0, r1
 92         ldmfd   sp!, {r4-r11,pc}
 93 
 94 
 95 ENTRY(do_cksum)
 96         stmfd   sp!, {r4-r7, lr}
 97         bl      L_cksumdata
 98         mov     r0, r2
 99         ldmfd   sp!, {r4-r7, pc}
100 /*
101  * The main in*_cksum() workhorse...
102  *
103  * Entry parameters:
104  *      r0      Pointer to buffer
105  *      r1      Buffer length
106  *      lr      Return address
107  *
108  * Returns:
109  *      r2      Accumulated 32-bit sum
110  *
111  * Clobbers:
112  *      r0-r7
113  */
114 /* LINTSTUB: Ignore */
115 ASENTRY_NP(L_cksumdata)
116 #ifdef _ARM_ARCH_5E
117         pld     [r0]                    /* Pre-fetch the start of the buffer */
118 #endif
119         mov     r2, #0
120 
121         /* We first have to word-align the buffer.  */
122         ands    r7, r0, #0x03
123         beq     .Lcksumdata_wordaligned
124         rsb     r7, r7, #0x04
125         cmp     r1, r7                  /* Enough bytes left to make it? */
126         blt     .Lcksumdata_endgame
127         cmp     r7, #0x02
128         ldrb    r4, [r0], #0x01         /* Fetch 1st byte */
129         ldrgeb  r5, [r0], #0x01         /* Fetch 2nd byte */
130         movlt   r5, #0x00
131         ldrgtb  r6, [r0], #0x01         /* Fetch 3rd byte */
132         movle   r6, #0x00
133         /* Combine the three bytes depending on endianness and alignment */
134 #ifdef __ARMEB__
135         orreq   r2, r5, r4, lsl #8
136         orreq   r2, r2, r6, lsl #24
137         orrne   r2, r4, r5, lsl #8
138         orrne   r2, r2, r6, lsl #16
139 #else
140         orreq   r2, r4, r5, lsl #8
141         orreq   r2, r2, r6, lsl #16
142         orrne   r2, r5, r4, lsl #8
143         orrne   r2, r2, r6, lsl #24
144 #endif
145         subs    r1, r1, r7              /* Update length */
146         RETeq                   /* All done? */
147 
148         /* Buffer is now word aligned */
149 .Lcksumdata_wordaligned:
150 #ifdef _ARM_ARCH_5E
151         cmp     r1, #0x04               /* Less than 4 bytes left? */
152         blt     .Lcksumdata_endgame     /* Yup */
153 
154         /* Now quad-align, if necessary */
155         ands    r7, r0, #0x04
156         ldrne   r7, [r0], #0x04
157         subne   r1, r1, #0x04
158         subs    r1, r1, #0x40
159         blt     .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */
160 
161         /*
162          * Buffer is now quad aligned. Sum 64 bytes at a time.
163          * Note: First ldrd is hoisted above the loop, together with
164          * setting r6 to zero to avoid stalling for results in the
165          * loop. (r7 is live, from above).
166          */
167         ldrd    r4, [r0], #0x08
168         mov     r6, #0x00
169 .Lcksumdata_bigloop:
170         pld     [r0, #0x18]
171         adds    r2, r2, r6
172         adcs    r2, r2, r7
173         ldrd    r6, [r0], #0x08
174         adcs    r2, r2, r4
175         adcs    r2, r2, r5
176         ldrd    r4, [r0], #0x08
177         adcs    r2, r2, r6
178         adcs    r2, r2, r7
179         ldrd    r6, [r0], #0x08
180         adcs    r2, r2, r4
181         adcs    r2, r2, r5
182         ldrd    r4, [r0], #0x08
183         adcs    r2, r2, r6
184         adcs    r2, r2, r7
185         pld     [r0, #0x18]
186         ldrd    r6, [r0], #0x08
187         adcs    r2, r2, r4
188         adcs    r2, r2, r5
189         ldrd    r4, [r0], #0x08
190         adcs    r2, r2, r6
191         adcs    r2, r2, r7
192         ldrd    r6, [r0], #0x08
193         adcs    r2, r2, r4
194         adcs    r2, r2, r5
195         adc     r2, r2, #0x00
196         subs    r1, r1, #0x40
197         ldrged  r4, [r0], #0x08
198         bge     .Lcksumdata_bigloop
199 
200         adds    r2, r2, r6              /* r6/r7 still need summing */
201 .Lcksumdata_bigloop_end:
202         adcs    r2, r2, r7
203         adc     r2, r2, #0x00
204 
205 #else   /* !_ARM_ARCH_5E */
206 
207         subs    r1, r1, #0x40
208         blt     .Lcksumdata_bigloop_end
209 
210 .Lcksumdata_bigloop:
211         ldmia   r0!, {r3, r4, r5, r6}
212         adds    r2, r2, r3
213         adcs    r2, r2, r4
214         adcs    r2, r2, r5
215         ldmia   r0!, {r3, r4, r5, r7}
216         adcs    r2, r2, r6
217         adcs    r2, r2, r3
218         adcs    r2, r2, r4
219         adcs    r2, r2, r5
220         ldmia   r0!, {r3, r4, r5, r6}
221         adcs    r2, r2, r7
222         adcs    r2, r2, r3
223         adcs    r2, r2, r4
224         adcs    r2, r2, r5
225         ldmia   r0!, {r3, r4, r5, r7}
226         adcs    r2, r2, r6
227         adcs    r2, r2, r3
228         adcs    r2, r2, r4
229         adcs    r2, r2, r5
230         adcs    r2, r2, r7
231         adc     r2, r2, #0x00
232         subs    r1, r1, #0x40
233         bge     .Lcksumdata_bigloop
234 .Lcksumdata_bigloop_end:
235 #endif
236 
237         adds    r1, r1, #0x40
238         RETeq
239         cmp     r1, #0x20
240 
241 #ifdef _ARM_ARCH_5E
242         ldrged  r4, [r0], #0x08         /* Avoid stalling pld and result */
243         blt     .Lcksumdata_less_than_32
244         pld     [r0, #0x18]
245         ldrd    r6, [r0], #0x08
246         adds    r2, r2, r4
247         adcs    r2, r2, r5
248         ldrd    r4, [r0], #0x08
249         adcs    r2, r2, r6
250         adcs    r2, r2, r7
251         ldrd    r6, [r0], #0x08
252         adcs    r2, r2, r4
253         adcs    r2, r2, r5
254         adcs    r2, r2, r6              /* XXX: Unavoidable result stall */
255         adcs    r2, r2, r7
256 #else
257         blt     .Lcksumdata_less_than_32
258         ldmia   r0!, {r3, r4, r5, r6}
259         adds    r2, r2, r3
260         adcs    r2, r2, r4
261         adcs    r2, r2, r5
262         ldmia   r0!, {r3, r4, r5, r7}
263         adcs    r2, r2, r6
264         adcs    r2, r2, r3
265         adcs    r2, r2, r4
266         adcs    r2, r2, r5
267         adcs    r2, r2, r7
268 #endif
269         adc     r2, r2, #0x00
270         subs    r1, r1, #0x20
271         RETeq
272 
273 .Lcksumdata_less_than_32:
274         /* There are less than 32 bytes left */
275         and     r3, r1, #0x18
276         rsb     r4, r3, #0x18
277         sub     r1, r1, r3
278         adds    r4, r4, r4, lsr #1      /* Side effect: Clear carry flag */
279         addne   pc, pc, r4
280         nop
281 
282 /*
283  * Note: We use ldm here, even on armv5e, since the combined issue/result
284  * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs.
285  */
286         /* At least 24 bytes remaining... */
287         ldmia   r0!, {r4, r5}
288         adcs    r2, r2, r4
289         adcs    r2, r2, r5
290 
291         /* At least 16 bytes remaining... */
292         ldmia   r0!, {r4, r5}
293         adcs    r2, r2, r4
294         adcs    r2, r2, r5
295 
296         /* At least 8 bytes remaining... */
297         ldmia   r0!, {r4, r5}
298         adcs    r2, r2, r4
299         adcs    r2, r2, r5
300 
301         /* Less than 8 bytes remaining... */
302         adc     r2, r2, #0x00
303         subs    r1, r1, #0x04
304         blt     .Lcksumdata_lessthan4
305 
306         ldr     r4, [r0], #0x04
307         sub     r1, r1, #0x04
308         adds    r2, r2, r4
309         adc     r2, r2, #0x00
310 
311         /* Deal with < 4 bytes remaining */
312 .Lcksumdata_lessthan4:
313         adds    r1, r1, #0x04
314         RETeq
315 
316         /* Deal with 1 to 3 remaining bytes, possibly misaligned */
317 .Lcksumdata_endgame:
318         ldrb    r3, [r0]                /* Fetch first byte */
319         cmp     r1, #0x02
320         ldrgeb  r4, [r0, #0x01]         /* Fetch 2nd and 3rd as necessary */
321         movlt   r4, #0x00
322         ldrgtb  r5, [r0, #0x02]
323         movle   r5, #0x00
324         /* Combine the three bytes depending on endianness and alignment */
325         tst     r0, #0x01
326 #ifdef __ARMEB__
327         orreq   r3, r4, r3, lsl #8
328         orreq   r3, r3, r5, lsl #24
329         orrne   r3, r3, r4, lsl #8
330         orrne   r3, r3, r5, lsl #16
331 #else
332         orreq   r3, r3, r4, lsl #8
333         orreq   r3, r3, r5, lsl #16
334         orrne   r3, r4, r3, lsl #8
335         orrne   r3, r3, r5, lsl #24
336 #endif
337         adds    r2, r2, r3
338         adc     r2, r2, #0x00
339         RET

Cache object: f61485d3a3e46d65e003a6735cc1e845


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.