1 /* $NetBSD: in_cksum_arm.S,v 1.2 2003/09/23 10:01:36 scw Exp $ */
2
3 /*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39 /*
40 * Hand-optimised in_cksum() and in4_cksum() implementations for ARM/armv5e
41 */
42
43 #include "opt_inet.h"
44
45 #include <machine/asm.h>
46 #include "assym.s"
47 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/in_cksum_arm.S 248361 2013-03-16 02:48:49Z andrew $");
48
49 /*
50 * int in_cksum(struct mbuf *m, int len)
51 *
52 * Entry:
53 * r0 m
54 * r1 len
55 *
56 * NOTE: Assumes 'm' is *never* NULL.
57 */
58 /* LINTSTUB: Func: int in_cksum(struct mbuf *, int) */
59 ENTRY(in_cksum)
60 stmfd sp!, {r4-r11,lr}
61 mov r8, #0x00
62 mov r9, r1
63 mov r10, #0x00
64 mov ip, r0
65
66 .Lin_cksum_loop:
67 ldr r1, [ip, #(M_LEN)]
68 ldr r0, [ip, #(M_DATA)]
69 ldr ip, [ip, #(M_NEXT)]
70 .Lin_cksum_entry4:
71 cmp r9, r1
72 movlt r1, r9
73 sub r9, r9, r1
74 eor r11, r10, r0
75 add r10, r10, r1
76 adds r2, r1, #0x00
77 blne _ASM_LABEL(L_cksumdata)
78 tst r11, #0x01
79 movne r2, r2, ror #8
80 adds r8, r8, r2
81 adc r8, r8, #0x00
82 cmp ip, #0x00
83 bne .Lin_cksum_loop
84
85 mov r1, #0xff
86 orr r1, r1, #0xff00
87 and r0, r8, r1
88 add r0, r0, r8, lsr #16
89 add r0, r0, r0, lsr #16
90 and r0, r0, r1
91 eor r0, r0, r1
92 ldmfd sp!, {r4-r11,pc}
93 END(in_cksum)
94
95 ENTRY(do_cksum)
96 stmfd sp!, {r4-r7, lr}
97 bl L_cksumdata
98 mov r0, r2
99 ldmfd sp!, {r4-r7, pc}
100 END(do_cksum)
101
102 /*
103 * The main in*_cksum() workhorse...
104 *
105 * Entry parameters:
106 * r0 Pointer to buffer
107 * r1 Buffer length
108 * lr Return address
109 *
110 * Returns:
111 * r2 Accumulated 32-bit sum
112 *
113 * Clobbers:
114 * r0-r7
115 */
116 /* LINTSTUB: Ignore */
117 ASENTRY_NP(L_cksumdata)
118 #ifdef _ARM_ARCH_5E
119 pld [r0] /* Pre-fetch the start of the buffer */
120 #endif
121 mov r2, #0
122
123 /* We first have to word-align the buffer. */
124 ands r7, r0, #0x03
125 beq .Lcksumdata_wordaligned
126 rsb r7, r7, #0x04
127 cmp r1, r7 /* Enough bytes left to make it? */
128 blt .Lcksumdata_endgame
129 cmp r7, #0x02
130 ldrb r4, [r0], #0x01 /* Fetch 1st byte */
131 ldrgeb r5, [r0], #0x01 /* Fetch 2nd byte */
132 movlt r5, #0x00
133 ldrgtb r6, [r0], #0x01 /* Fetch 3rd byte */
134 movle r6, #0x00
135 /* Combine the three bytes depending on endianness and alignment */
136 #ifdef __ARMEB__
137 orreq r2, r5, r4, lsl #8
138 orreq r2, r2, r6, lsl #24
139 orrne r2, r4, r5, lsl #8
140 orrne r2, r2, r6, lsl #16
141 #else
142 orreq r2, r4, r5, lsl #8
143 orreq r2, r2, r6, lsl #16
144 orrne r2, r5, r4, lsl #8
145 orrne r2, r2, r6, lsl #24
146 #endif
147 subs r1, r1, r7 /* Update length */
148 RETeq /* All done? */
149
150 /* Buffer is now word aligned */
151 .Lcksumdata_wordaligned:
152 #ifdef _ARM_ARCH_5E
153 cmp r1, #0x04 /* Less than 4 bytes left? */
154 blt .Lcksumdata_endgame /* Yup */
155
156 /* Now quad-align, if necessary */
157 ands r7, r0, #0x04
158 ldrne r7, [r0], #0x04
159 subne r1, r1, #0x04
160 subs r1, r1, #0x40
161 blt .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */
162
163 /*
164 * Buffer is now quad aligned. Sum 64 bytes at a time.
165 * Note: First ldrd is hoisted above the loop, together with
166 * setting r6 to zero to avoid stalling for results in the
167 * loop. (r7 is live, from above).
168 */
169 ldrd r4, [r0], #0x08
170 mov r6, #0x00
171 .Lcksumdata_bigloop:
172 pld [r0, #0x18]
173 adds r2, r2, r6
174 adcs r2, r2, r7
175 ldrd r6, [r0], #0x08
176 adcs r2, r2, r4
177 adcs r2, r2, r5
178 ldrd r4, [r0], #0x08
179 adcs r2, r2, r6
180 adcs r2, r2, r7
181 ldrd r6, [r0], #0x08
182 adcs r2, r2, r4
183 adcs r2, r2, r5
184 ldrd r4, [r0], #0x08
185 adcs r2, r2, r6
186 adcs r2, r2, r7
187 pld [r0, #0x18]
188 ldrd r6, [r0], #0x08
189 adcs r2, r2, r4
190 adcs r2, r2, r5
191 ldrd r4, [r0], #0x08
192 adcs r2, r2, r6
193 adcs r2, r2, r7
194 ldrd r6, [r0], #0x08
195 adcs r2, r2, r4
196 adcs r2, r2, r5
197 adc r2, r2, #0x00
198 subs r1, r1, #0x40
199 ldrged r4, [r0], #0x08
200 bge .Lcksumdata_bigloop
201
202 adds r2, r2, r6 /* r6/r7 still need summing */
203 .Lcksumdata_bigloop_end:
204 adcs r2, r2, r7
205 adc r2, r2, #0x00
206
207 #else /* !_ARM_ARCH_5E */
208
209 subs r1, r1, #0x40
210 blt .Lcksumdata_bigloop_end
211
212 .Lcksumdata_bigloop:
213 ldmia r0!, {r3, r4, r5, r6}
214 adds r2, r2, r3
215 adcs r2, r2, r4
216 adcs r2, r2, r5
217 ldmia r0!, {r3, r4, r5, r7}
218 adcs r2, r2, r6
219 adcs r2, r2, r3
220 adcs r2, r2, r4
221 adcs r2, r2, r5
222 ldmia r0!, {r3, r4, r5, r6}
223 adcs r2, r2, r7
224 adcs r2, r2, r3
225 adcs r2, r2, r4
226 adcs r2, r2, r5
227 ldmia r0!, {r3, r4, r5, r7}
228 adcs r2, r2, r6
229 adcs r2, r2, r3
230 adcs r2, r2, r4
231 adcs r2, r2, r5
232 adcs r2, r2, r7
233 adc r2, r2, #0x00
234 subs r1, r1, #0x40
235 bge .Lcksumdata_bigloop
236 .Lcksumdata_bigloop_end:
237 #endif
238
239 adds r1, r1, #0x40
240 RETeq
241 cmp r1, #0x20
242
243 #ifdef _ARM_ARCH_5E
244 ldrged r4, [r0], #0x08 /* Avoid stalling pld and result */
245 blt .Lcksumdata_less_than_32
246 pld [r0, #0x18]
247 ldrd r6, [r0], #0x08
248 adds r2, r2, r4
249 adcs r2, r2, r5
250 ldrd r4, [r0], #0x08
251 adcs r2, r2, r6
252 adcs r2, r2, r7
253 ldrd r6, [r0], #0x08
254 adcs r2, r2, r4
255 adcs r2, r2, r5
256 adcs r2, r2, r6 /* XXX: Unavoidable result stall */
257 adcs r2, r2, r7
258 #else
259 blt .Lcksumdata_less_than_32
260 ldmia r0!, {r3, r4, r5, r6}
261 adds r2, r2, r3
262 adcs r2, r2, r4
263 adcs r2, r2, r5
264 ldmia r0!, {r3, r4, r5, r7}
265 adcs r2, r2, r6
266 adcs r2, r2, r3
267 adcs r2, r2, r4
268 adcs r2, r2, r5
269 adcs r2, r2, r7
270 #endif
271 adc r2, r2, #0x00
272 subs r1, r1, #0x20
273 RETeq
274
275 .Lcksumdata_less_than_32:
276 /* There are less than 32 bytes left */
277 and r3, r1, #0x18
278 rsb r4, r3, #0x18
279 sub r1, r1, r3
280 adds r4, r4, r4, lsr #1 /* Side effect: Clear carry flag */
281 addne pc, pc, r4
282 nop
283
284 /*
285 * Note: We use ldm here, even on armv5e, since the combined issue/result
286 * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs.
287 */
288 /* At least 24 bytes remaining... */
289 ldmia r0!, {r4, r5}
290 adcs r2, r2, r4
291 adcs r2, r2, r5
292
293 /* At least 16 bytes remaining... */
294 ldmia r0!, {r4, r5}
295 adcs r2, r2, r4
296 adcs r2, r2, r5
297
298 /* At least 8 bytes remaining... */
299 ldmia r0!, {r4, r5}
300 adcs r2, r2, r4
301 adcs r2, r2, r5
302
303 /* Less than 8 bytes remaining... */
304 adc r2, r2, #0x00
305 subs r1, r1, #0x04
306 blt .Lcksumdata_lessthan4
307
308 ldr r4, [r0], #0x04
309 sub r1, r1, #0x04
310 adds r2, r2, r4
311 adc r2, r2, #0x00
312
313 /* Deal with < 4 bytes remaining */
314 .Lcksumdata_lessthan4:
315 adds r1, r1, #0x04
316 RETeq
317
318 /* Deal with 1 to 3 remaining bytes, possibly misaligned */
319 .Lcksumdata_endgame:
320 ldrb r3, [r0] /* Fetch first byte */
321 cmp r1, #0x02
322 ldrgeb r4, [r0, #0x01] /* Fetch 2nd and 3rd as necessary */
323 movlt r4, #0x00
324 ldrgtb r5, [r0, #0x02]
325 movle r5, #0x00
326 /* Combine the three bytes depending on endianness and alignment */
327 tst r0, #0x01
328 #ifdef __ARMEB__
329 orreq r3, r4, r3, lsl #8
330 orreq r3, r3, r5, lsl #24
331 orrne r3, r3, r4, lsl #8
332 orrne r3, r3, r5, lsl #16
333 #else
334 orreq r3, r3, r4, lsl #8
335 orreq r3, r3, r5, lsl #16
336 orrne r3, r4, r3, lsl #8
337 orrne r3, r3, r5, lsl #24
338 #endif
339 adds r2, r2, r3
340 adc r2, r2, #0x00
341 RET
342 END(L_cksumdata)
343
Cache object: 8c14d035f08e4901f6fbc239d57150c3
|