FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
29 *
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
46 * written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 */
60 /*
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
63 *
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 *
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
87 */
88
89 #include <machine/asm.h>
90 __FBSDID("$FreeBSD$");
91
92 #include "assym.inc"
93
94 .syntax unified
95
96 /*
97 * memset: Sets a block of memory to the specified value
98 *
99 * On entry:
100 * r0 - dest address
101 * r1 - byte to write
102 * r2 - number of bytes to write
103 *
104 * On exit:
105 * r0 - dest address
106 */
107 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
108 ENTRY(memset)
109 and r3, r1, #0xff /* We deal with bytes */
110 mov r1, r2
111 do_memset:
112 cmp r1, #0x04 /* Do we have less than 4 bytes */
113 mov ip, r0
114 blt .Lmemset_lessthanfour
115
116 /* Ok first we will word align the address */
117 ands r2, ip, #0x03 /* Get the bottom two bits */
118 bne .Lmemset_wordunaligned /* The address is not word aligned */
119
120 /* We are now word aligned */
121 .Lmemset_wordaligned:
122 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
123 tst ip, #0x04 /* Quad-align for armv5e */
124 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
125 subne r1, r1, #0x04 /* Quad-align if necessary */
126 strne r3, [ip], #0x04
127 cmp r1, #0x10
128 blt .Lmemset_loop4 /* If less than 16 then use words */
129 mov r2, r3 /* Duplicate data */
130 cmp r1, #0x80 /* If < 128 then skip the big loop */
131 blt .Lmemset_loop32
132
133 /* Do 128 bytes at a time */
134 .Lmemset_loop128:
135 subs r1, r1, #0x80
136 strdge r2, [ip], #0x08
137 strdge r2, [ip], #0x08
138 strdge r2, [ip], #0x08
139 strdge r2, [ip], #0x08
140 strdge r2, [ip], #0x08
141 strdge r2, [ip], #0x08
142 strdge r2, [ip], #0x08
143 strdge r2, [ip], #0x08
144 strdge r2, [ip], #0x08
145 strdge r2, [ip], #0x08
146 strdge r2, [ip], #0x08
147 strdge r2, [ip], #0x08
148 strdge r2, [ip], #0x08
149 strdge r2, [ip], #0x08
150 strdge r2, [ip], #0x08
151 strdge r2, [ip], #0x08
152 bgt .Lmemset_loop128
153 RETeq /* Zero length so just exit */
154
155 add r1, r1, #0x80 /* Adjust for extra sub */
156
157 /* Do 32 bytes at a time */
158 .Lmemset_loop32:
159 subs r1, r1, #0x20
160 strdge r2, [ip], #0x08
161 strdge r2, [ip], #0x08
162 strdge r2, [ip], #0x08
163 strdge r2, [ip], #0x08
164 bgt .Lmemset_loop32
165 RETeq /* Zero length so just exit */
166
167 adds r1, r1, #0x10 /* Partially adjust for extra sub */
168
169 /* Deal with 16 bytes or more */
170 strdge r2, [ip], #0x08
171 strdge r2, [ip], #0x08
172 RETeq /* Zero length so just exit */
173
174 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
175
176 /* We have at least 4 bytes so copy as words */
177 .Lmemset_loop4:
178 subs r1, r1, #0x04
179 strge r3, [ip], #0x04
180 bgt .Lmemset_loop4
181 RETeq /* Zero length so just exit */
182
183 /* Compensate for 64-bit alignment check */
184 adds r1, r1, #0x04
185 RETeq
186 cmp r1, #2
187
188 strb r3, [ip], #0x01 /* Set 1 byte */
189 strbge r3, [ip], #0x01 /* Set another byte */
190 strbgt r3, [ip] /* and a third */
191 RET /* Exit */
192
193 .Lmemset_wordunaligned:
194 rsb r2, r2, #0x004
195 strb r3, [ip], #0x01 /* Set 1 byte */
196 cmp r2, #0x02
197 strbge r3, [ip], #0x01 /* Set another byte */
198 sub r1, r1, r2
199 strbgt r3, [ip], #0x01 /* and a third */
200 cmp r1, #0x04 /* More than 4 bytes left? */
201 bge .Lmemset_wordaligned /* Yup */
202
203 .Lmemset_lessthanfour:
204 cmp r1, #0x00
205 RETeq /* Zero length so exit */
206 strb r3, [ip], #0x01 /* Set 1 byte */
207 cmp r1, #0x02
208 strbge r3, [ip], #0x01 /* Set another byte */
209 strbgt r3, [ip] /* and a third */
210 RET /* Exit */
211 END(memset)
212
213 ENTRY(memcmp)
214 mov ip, r0
215 cmp r2, #0x06
216 beq .Lmemcmp_6bytes
217 mov r0, #0x00
218
219 /* Are both addresses aligned the same way? */
220 cmp r2, #0x00
221 eorsne r3, ip, r1
222 RETeq /* len == 0, or same addresses! */
223 tst r3, #0x03
224 subne r2, r2, #0x01
225 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
226
227 /* Word-align the addresses, if necessary */
228 sub r3, r1, #0x05
229 ands r3, r3, #0x03
230 add r3, r3, r3, lsl #1
231 addne pc, pc, r3, lsl #3
232 nop
233
234 /* Compare up to 3 bytes */
235 ldrb r0, [ip], #0x01
236 ldrb r3, [r1], #0x01
237 subs r0, r0, r3
238 RETne
239 subs r2, r2, #0x01
240 RETeq
241
242 /* Compare up to 2 bytes */
243 ldrb r0, [ip], #0x01
244 ldrb r3, [r1], #0x01
245 subs r0, r0, r3
246 RETne
247 subs r2, r2, #0x01
248 RETeq
249
250 /* Compare 1 byte */
251 ldrb r0, [ip], #0x01
252 ldrb r3, [r1], #0x01
253 subs r0, r0, r3
254 RETne
255 subs r2, r2, #0x01
256 RETeq
257
258 /* Compare 4 bytes at a time, if possible */
259 subs r2, r2, #0x04
260 bcc .Lmemcmp_bytewise
261 .Lmemcmp_word_aligned:
262 ldr r0, [ip], #0x04
263 ldr r3, [r1], #0x04
264 subs r2, r2, #0x04
265 cmpcs r0, r3
266 beq .Lmemcmp_word_aligned
267 sub r0, r0, r3
268
269 /* Correct for extra subtraction, and check if done */
270 adds r2, r2, #0x04
271 cmpeq r0, #0x00 /* If done, did all bytes match? */
272 RETeq /* Yup. Just return */
273
274 /* Re-do the final word byte-wise */
275 sub ip, ip, #0x04
276 sub r1, r1, #0x04
277
278 .Lmemcmp_bytewise:
279 add r2, r2, #0x03
280 .Lmemcmp_bytewise2:
281 ldrb r0, [ip], #0x01
282 ldrb r3, [r1], #0x01
283 subs r2, r2, #0x01
284 cmpcs r0, r3
285 beq .Lmemcmp_bytewise2
286 sub r0, r0, r3
287 RET
288
289 /*
290 * 6 byte compares are very common, thanks to the network stack.
291 * This code is hand-scheduled to reduce the number of stalls for
292 * load results. Everything else being equal, this will be ~32%
293 * faster than a byte-wise memcmp.
294 */
295 .align 5
296 .Lmemcmp_6bytes:
297 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
298 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
299 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
300 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
301 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */
302 RETne /* Return if mismatch on #0 */
303 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
304 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */
305 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */
306 RETne /* Return if mismatch on #1 */
307 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
308 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
309 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */
310 RETne /* Return if mismatch on #2 */
311 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
312 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */
313 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */
314 RETne /* Return if mismatch on #3 */
315 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
316 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
317 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */
318 RETne /* Return if mismatch on #4 */
319 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
320 RET
321 END(memcmp)
322
323 ENTRY(memmove)
324 /* Do the buffers overlap? */
325 cmp r0, r1
326 RETeq /* Bail now if src/dst are the same */
327 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
328 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
329 cmp r3, r2 /* if (r3 < len) we have an overlap */
330 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
331
332 /* Determine copy direction */
333 cmp r1, r0
334 bcc .Lmemmove_backwards
335
336 moveq r0, #0 /* Quick abort for len=0 */
337 RETeq
338
339 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
340 subs r2, r2, #4
341 blt .Lmemmove_fl4 /* less than 4 bytes */
342 ands r12, r0, #3
343 bne .Lmemmove_fdestul /* oh unaligned destination addr */
344 ands r12, r1, #3
345 bne .Lmemmove_fsrcul /* oh unaligned source addr */
346
347 .Lmemmove_ft8:
348 /* We have aligned source and destination */
349 subs r2, r2, #8
350 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
351 subs r2, r2, #0x14
352 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
353 stmdb sp!, {r4} /* borrow r4 */
354
355 /* blat 32 bytes at a time */
356 /* XXX for really big copies perhaps we should use more registers */
357 .Lmemmove_floop32:
358 ldmia r1!, {r3, r4, r12, lr}
359 stmia r0!, {r3, r4, r12, lr}
360 ldmia r1!, {r3, r4, r12, lr}
361 stmia r0!, {r3, r4, r12, lr}
362 subs r2, r2, #0x20
363 bge .Lmemmove_floop32
364
365 cmn r2, #0x10
366 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
367 stmiage r0!, {r3, r4, r12, lr}
368 subge r2, r2, #0x10
369 ldmia sp!, {r4} /* return r4 */
370
371 .Lmemmove_fl32:
372 adds r2, r2, #0x14
373
374 /* blat 12 bytes at a time */
375 .Lmemmove_floop12:
376 ldmiage r1!, {r3, r12, lr}
377 stmiage r0!, {r3, r12, lr}
378 subsge r2, r2, #0x0c
379 bge .Lmemmove_floop12
380
381 .Lmemmove_fl12:
382 adds r2, r2, #8
383 blt .Lmemmove_fl4
384
385 subs r2, r2, #4
386 ldrlt r3, [r1], #4
387 strlt r3, [r0], #4
388 ldmiage r1!, {r3, r12}
389 stmiage r0!, {r3, r12}
390 subge r2, r2, #4
391
392 .Lmemmove_fl4:
393 /* less than 4 bytes to go */
394 adds r2, r2, #4
395 ldmiaeq sp!, {r0, pc} /* done */
396
397 /* copy the crud byte at a time */
398 cmp r2, #2
399 ldrb r3, [r1], #1
400 strb r3, [r0], #1
401 ldrbge r3, [r1], #1
402 strbge r3, [r0], #1
403 ldrbgt r3, [r1], #1
404 strbgt r3, [r0], #1
405 ldmia sp!, {r0, pc}
406
407 /* erg - unaligned destination */
408 .Lmemmove_fdestul:
409 rsb r12, r12, #4
410 cmp r12, #2
411
412 /* align destination with byte copies */
413 ldrb r3, [r1], #1
414 strb r3, [r0], #1
415 ldrbge r3, [r1], #1
416 strbge r3, [r0], #1
417 ldrbgt r3, [r1], #1
418 strbgt r3, [r0], #1
419 subs r2, r2, r12
420 blt .Lmemmove_fl4 /* less the 4 bytes */
421
422 ands r12, r1, #3
423 beq .Lmemmove_ft8 /* we have an aligned source */
424
425 /* erg - unaligned source */
426 /* This is where it gets nasty ... */
427 .Lmemmove_fsrcul:
428 bic r1, r1, #3
429 ldr lr, [r1], #4
430 cmp r12, #2
431 bgt .Lmemmove_fsrcul3
432 beq .Lmemmove_fsrcul2
433 cmp r2, #0x0c
434 blt .Lmemmove_fsrcul1loop4
435 sub r2, r2, #0x0c
436 stmdb sp!, {r4, r5}
437
438 .Lmemmove_fsrcul1loop16:
439 mov r3, lr, lsr #8
440 ldmia r1!, {r4, r5, r12, lr}
441 orr r3, r3, r4, lsl #24
442 mov r4, r4, lsr #8
443 orr r4, r4, r5, lsl #24
444 mov r5, r5, lsr #8
445 orr r5, r5, r12, lsl #24
446 mov r12, r12, lsr #8
447 orr r12, r12, lr, lsl #24
448 stmia r0!, {r3-r5, r12}
449 subs r2, r2, #0x10
450 bge .Lmemmove_fsrcul1loop16
451 ldmia sp!, {r4, r5}
452 adds r2, r2, #0x0c
453 blt .Lmemmove_fsrcul1l4
454
455 .Lmemmove_fsrcul1loop4:
456 mov r12, lr, lsr #8
457 ldr lr, [r1], #4
458 orr r12, r12, lr, lsl #24
459 str r12, [r0], #4
460 subs r2, r2, #4
461 bge .Lmemmove_fsrcul1loop4
462
463 .Lmemmove_fsrcul1l4:
464 sub r1, r1, #3
465 b .Lmemmove_fl4
466
467 .Lmemmove_fsrcul2:
468 cmp r2, #0x0c
469 blt .Lmemmove_fsrcul2loop4
470 sub r2, r2, #0x0c
471 stmdb sp!, {r4, r5}
472
473 .Lmemmove_fsrcul2loop16:
474 mov r3, lr, lsr #16
475 ldmia r1!, {r4, r5, r12, lr}
476 orr r3, r3, r4, lsl #16
477 mov r4, r4, lsr #16
478 orr r4, r4, r5, lsl #16
479 mov r5, r5, lsr #16
480 orr r5, r5, r12, lsl #16
481 mov r12, r12, lsr #16
482 orr r12, r12, lr, lsl #16
483 stmia r0!, {r3-r5, r12}
484 subs r2, r2, #0x10
485 bge .Lmemmove_fsrcul2loop16
486 ldmia sp!, {r4, r5}
487 adds r2, r2, #0x0c
488 blt .Lmemmove_fsrcul2l4
489
490 .Lmemmove_fsrcul2loop4:
491 mov r12, lr, lsr #16
492 ldr lr, [r1], #4
493 orr r12, r12, lr, lsl #16
494 str r12, [r0], #4
495 subs r2, r2, #4
496 bge .Lmemmove_fsrcul2loop4
497
498 .Lmemmove_fsrcul2l4:
499 sub r1, r1, #2
500 b .Lmemmove_fl4
501
502 .Lmemmove_fsrcul3:
503 cmp r2, #0x0c
504 blt .Lmemmove_fsrcul3loop4
505 sub r2, r2, #0x0c
506 stmdb sp!, {r4, r5}
507
508 .Lmemmove_fsrcul3loop16:
509 mov r3, lr, lsr #24
510 ldmia r1!, {r4, r5, r12, lr}
511 orr r3, r3, r4, lsl #8
512 mov r4, r4, lsr #24
513 orr r4, r4, r5, lsl #8
514 mov r5, r5, lsr #24
515 orr r5, r5, r12, lsl #8
516 mov r12, r12, lsr #24
517 orr r12, r12, lr, lsl #8
518 stmia r0!, {r3-r5, r12}
519 subs r2, r2, #0x10
520 bge .Lmemmove_fsrcul3loop16
521 ldmia sp!, {r4, r5}
522 adds r2, r2, #0x0c
523 blt .Lmemmove_fsrcul3l4
524
525 .Lmemmove_fsrcul3loop4:
526 mov r12, lr, lsr #24
527 ldr lr, [r1], #4
528 orr r12, r12, lr, lsl #8
529 str r12, [r0], #4
530 subs r2, r2, #4
531 bge .Lmemmove_fsrcul3loop4
532
533 .Lmemmove_fsrcul3l4:
534 sub r1, r1, #1
535 b .Lmemmove_fl4
536
537 .Lmemmove_backwards:
538 add r1, r1, r2
539 add r0, r0, r2
540 subs r2, r2, #4
541 blt .Lmemmove_bl4 /* less than 4 bytes */
542 ands r12, r0, #3
543 bne .Lmemmove_bdestul /* oh unaligned destination addr */
544 ands r12, r1, #3
545 bne .Lmemmove_bsrcul /* oh unaligned source addr */
546
547 .Lmemmove_bt8:
548 /* We have aligned source and destination */
549 subs r2, r2, #8
550 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
551 stmdb sp!, {r4, lr}
552 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
553 blt .Lmemmove_bl32
554
555 /* blat 32 bytes at a time */
556 /* XXX for really big copies perhaps we should use more registers */
557 .Lmemmove_bloop32:
558 ldmdb r1!, {r3, r4, r12, lr}
559 stmdb r0!, {r3, r4, r12, lr}
560 ldmdb r1!, {r3, r4, r12, lr}
561 stmdb r0!, {r3, r4, r12, lr}
562 subs r2, r2, #0x20
563 bge .Lmemmove_bloop32
564
565 .Lmemmove_bl32:
566 cmn r2, #0x10
567 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
568 stmdbge r0!, {r3, r4, r12, lr}
569 subge r2, r2, #0x10
570 adds r2, r2, #0x14
571 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
572 stmdbge r0!, {r3, r12, lr}
573 subge r2, r2, #0x0c
574 ldmia sp!, {r4, lr}
575
576 .Lmemmove_bl12:
577 adds r2, r2, #8
578 blt .Lmemmove_bl4
579 subs r2, r2, #4
580 ldrlt r3, [r1, #-4]!
581 strlt r3, [r0, #-4]!
582 ldmdbge r1!, {r3, r12}
583 stmdbge r0!, {r3, r12}
584 subge r2, r2, #4
585
586 .Lmemmove_bl4:
587 /* less than 4 bytes to go */
588 adds r2, r2, #4
589 RETeq /* done */
590
591 /* copy the crud byte at a time */
592 cmp r2, #2
593 ldrb r3, [r1, #-1]!
594 strb r3, [r0, #-1]!
595 ldrbge r3, [r1, #-1]!
596 strbge r3, [r0, #-1]!
597 ldrbgt r3, [r1, #-1]!
598 strbgt r3, [r0, #-1]!
599 RET
600
601 /* erg - unaligned destination */
602 .Lmemmove_bdestul:
603 cmp r12, #2
604
605 /* align destination with byte copies */
606 ldrb r3, [r1, #-1]!
607 strb r3, [r0, #-1]!
608 ldrbge r3, [r1, #-1]!
609 strbge r3, [r0, #-1]!
610 ldrbgt r3, [r1, #-1]!
611 strbgt r3, [r0, #-1]!
612 subs r2, r2, r12
613 blt .Lmemmove_bl4 /* less than 4 bytes to go */
614 ands r12, r1, #3
615 beq .Lmemmove_bt8 /* we have an aligned source */
616
617 /* erg - unaligned source */
618 /* This is where it gets nasty ... */
619 .Lmemmove_bsrcul:
620 bic r1, r1, #3
621 ldr r3, [r1, #0]
622 cmp r12, #2
623 blt .Lmemmove_bsrcul1
624 beq .Lmemmove_bsrcul2
625 cmp r2, #0x0c
626 blt .Lmemmove_bsrcul3loop4
627 sub r2, r2, #0x0c
628 stmdb sp!, {r4, r5, lr}
629
630 .Lmemmove_bsrcul3loop16:
631 mov lr, r3, lsl #8
632 ldmdb r1!, {r3-r5, r12}
633 orr lr, lr, r12, lsr #24
634 mov r12, r12, lsl #8
635 orr r12, r12, r5, lsr #24
636 mov r5, r5, lsl #8
637 orr r5, r5, r4, lsr #24
638 mov r4, r4, lsl #8
639 orr r4, r4, r3, lsr #24
640 stmdb r0!, {r4, r5, r12, lr}
641 subs r2, r2, #0x10
642 bge .Lmemmove_bsrcul3loop16
643 ldmia sp!, {r4, r5, lr}
644 adds r2, r2, #0x0c
645 blt .Lmemmove_bsrcul3l4
646
647 .Lmemmove_bsrcul3loop4:
648 mov r12, r3, lsl #8
649 ldr r3, [r1, #-4]!
650 orr r12, r12, r3, lsr #24
651 str r12, [r0, #-4]!
652 subs r2, r2, #4
653 bge .Lmemmove_bsrcul3loop4
654
655 .Lmemmove_bsrcul3l4:
656 add r1, r1, #3
657 b .Lmemmove_bl4
658
659 .Lmemmove_bsrcul2:
660 cmp r2, #0x0c
661 blt .Lmemmove_bsrcul2loop4
662 sub r2, r2, #0x0c
663 stmdb sp!, {r4, r5, lr}
664
665 .Lmemmove_bsrcul2loop16:
666 mov lr, r3, lsl #16
667 ldmdb r1!, {r3-r5, r12}
668 orr lr, lr, r12, lsr #16
669 mov r12, r12, lsl #16
670 orr r12, r12, r5, lsr #16
671 mov r5, r5, lsl #16
672 orr r5, r5, r4, lsr #16
673 mov r4, r4, lsl #16
674 orr r4, r4, r3, lsr #16
675 stmdb r0!, {r4, r5, r12, lr}
676 subs r2, r2, #0x10
677 bge .Lmemmove_bsrcul2loop16
678 ldmia sp!, {r4, r5, lr}
679 adds r2, r2, #0x0c
680 blt .Lmemmove_bsrcul2l4
681
682 .Lmemmove_bsrcul2loop4:
683 mov r12, r3, lsl #16
684 ldr r3, [r1, #-4]!
685 orr r12, r12, r3, lsr #16
686 str r12, [r0, #-4]!
687 subs r2, r2, #4
688 bge .Lmemmove_bsrcul2loop4
689
690 .Lmemmove_bsrcul2l4:
691 add r1, r1, #2
692 b .Lmemmove_bl4
693
694 .Lmemmove_bsrcul1:
695 cmp r2, #0x0c
696 blt .Lmemmove_bsrcul1loop4
697 sub r2, r2, #0x0c
698 stmdb sp!, {r4, r5, lr}
699
700 .Lmemmove_bsrcul1loop32:
701 mov lr, r3, lsl #24
702 ldmdb r1!, {r3-r5, r12}
703 orr lr, lr, r12, lsr #8
704 mov r12, r12, lsl #24
705 orr r12, r12, r5, lsr #8
706 mov r5, r5, lsl #24
707 orr r5, r5, r4, lsr #8
708 mov r4, r4, lsl #24
709 orr r4, r4, r3, lsr #8
710 stmdb r0!, {r4, r5, r12, lr}
711 subs r2, r2, #0x10
712 bge .Lmemmove_bsrcul1loop32
713 ldmia sp!, {r4, r5, lr}
714 adds r2, r2, #0x0c
715 blt .Lmemmove_bsrcul1l4
716
717 .Lmemmove_bsrcul1loop4:
718 mov r12, r3, lsl #24
719 ldr r3, [r1, #-4]!
720 orr r12, r12, r3, lsr #8
721 str r12, [r0, #-4]!
722 subs r2, r2, #4
723 bge .Lmemmove_bsrcul1loop4
724
725 .Lmemmove_bsrcul1l4:
726 add r1, r1, #1
727 b .Lmemmove_bl4
728 END(memmove)
729
730 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
731 ENTRY(memcpy)
732 pld [r1]
733 cmp r2, #0x0c
734 ble .Lmemcpy_short /* <= 12 bytes */
735 #ifdef FLASHADDR
736 #if FLASHADDR > PHYSADDR
737 ldr r3, =FLASHADDR
738 cmp r3, pc
739 bls .Lnormal
740 #else
741 ldr r3, =FLASHADDR
742 cmp r3, pc
743 bhi .Lnormal
744 #endif
745 #endif
746 mov r3, r0 /* We must not clobber r0 */
747
748 /* Word-align the destination buffer */
749 ands ip, r3, #0x03 /* Already word aligned? */
750 beq .Lmemcpy_wordaligned /* Yup */
751 cmp ip, #0x02
752 ldrb ip, [r1], #0x01
753 sub r2, r2, #0x01
754 strb ip, [r3], #0x01
755 ldrble ip, [r1], #0x01
756 suble r2, r2, #0x01
757 strble ip, [r3], #0x01
758 ldrblt ip, [r1], #0x01
759 sublt r2, r2, #0x01
760 strblt ip, [r3], #0x01
761
762 /* Destination buffer is now word aligned */
763 .Lmemcpy_wordaligned:
764 ands ip, r1, #0x03 /* Is src also word-aligned? */
765 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
766
767 /* Quad-align the destination buffer */
768 tst r3, #0x07 /* Already quad aligned? */
769 ldrne ip, [r1], #0x04
770 stmfd sp!, {r4-r9} /* Free up some registers */
771 subne r2, r2, #0x04
772 strne ip, [r3], #0x04
773
774 /* Destination buffer quad aligned, source is at least word aligned */
775 subs r2, r2, #0x80
776 blt .Lmemcpy_w_lessthan128
777
778 /* Copy 128 bytes at a time */
779 .Lmemcpy_w_loop128:
780 ldr r4, [r1], #0x04 /* LD:00-03 */
781 ldr r5, [r1], #0x04 /* LD:04-07 */
782 pld [r1, #0x18] /* Prefetch 0x20 */
783 ldr r6, [r1], #0x04 /* LD:08-0b */
784 ldr r7, [r1], #0x04 /* LD:0c-0f */
785 ldr r8, [r1], #0x04 /* LD:10-13 */
786 ldr r9, [r1], #0x04 /* LD:14-17 */
787 strd r4, [r3], #0x08 /* ST:00-07 */
788 ldr r4, [r1], #0x04 /* LD:18-1b */
789 ldr r5, [r1], #0x04 /* LD:1c-1f */
790 strd r6, [r3], #0x08 /* ST:08-0f */
791 ldr r6, [r1], #0x04 /* LD:20-23 */
792 ldr r7, [r1], #0x04 /* LD:24-27 */
793 pld [r1, #0x18] /* Prefetch 0x40 */
794 strd r8, [r3], #0x08 /* ST:10-17 */
795 ldr r8, [r1], #0x04 /* LD:28-2b */
796 ldr r9, [r1], #0x04 /* LD:2c-2f */
797 strd r4, [r3], #0x08 /* ST:18-1f */
798 ldr r4, [r1], #0x04 /* LD:30-33 */
799 ldr r5, [r1], #0x04 /* LD:34-37 */
800 strd r6, [r3], #0x08 /* ST:20-27 */
801 ldr r6, [r1], #0x04 /* LD:38-3b */
802 ldr r7, [r1], #0x04 /* LD:3c-3f */
803 strd r8, [r3], #0x08 /* ST:28-2f */
804 ldr r8, [r1], #0x04 /* LD:40-43 */
805 ldr r9, [r1], #0x04 /* LD:44-47 */
806 pld [r1, #0x18] /* Prefetch 0x60 */
807 strd r4, [r3], #0x08 /* ST:30-37 */
808 ldr r4, [r1], #0x04 /* LD:48-4b */
809 ldr r5, [r1], #0x04 /* LD:4c-4f */
810 strd r6, [r3], #0x08 /* ST:38-3f */
811 ldr r6, [r1], #0x04 /* LD:50-53 */
812 ldr r7, [r1], #0x04 /* LD:54-57 */
813 strd r8, [r3], #0x08 /* ST:40-47 */
814 ldr r8, [r1], #0x04 /* LD:58-5b */
815 ldr r9, [r1], #0x04 /* LD:5c-5f */
816 strd r4, [r3], #0x08 /* ST:48-4f */
817 ldr r4, [r1], #0x04 /* LD:60-63 */
818 ldr r5, [r1], #0x04 /* LD:64-67 */
819 pld [r1, #0x18] /* Prefetch 0x80 */
820 strd r6, [r3], #0x08 /* ST:50-57 */
821 ldr r6, [r1], #0x04 /* LD:68-6b */
822 ldr r7, [r1], #0x04 /* LD:6c-6f */
823 strd r8, [r3], #0x08 /* ST:58-5f */
824 ldr r8, [r1], #0x04 /* LD:70-73 */
825 ldr r9, [r1], #0x04 /* LD:74-77 */
826 strd r4, [r3], #0x08 /* ST:60-67 */
827 ldr r4, [r1], #0x04 /* LD:78-7b */
828 ldr r5, [r1], #0x04 /* LD:7c-7f */
829 strd r6, [r3], #0x08 /* ST:68-6f */
830 strd r8, [r3], #0x08 /* ST:70-77 */
831 subs r2, r2, #0x80
832 strd r4, [r3], #0x08 /* ST:78-7f */
833 bge .Lmemcpy_w_loop128
834
835 .Lmemcpy_w_lessthan128:
836 adds r2, r2, #0x80 /* Adjust for extra sub */
837 ldmfdeq sp!, {r4-r9}
838 RETeq /* Return now if done */
839 subs r2, r2, #0x20
840 blt .Lmemcpy_w_lessthan32
841
842 /* Copy 32 bytes at a time */
843 .Lmemcpy_w_loop32:
844 ldr r4, [r1], #0x04
845 ldr r5, [r1], #0x04
846 pld [r1, #0x18]
847 ldr r6, [r1], #0x04
848 ldr r7, [r1], #0x04
849 ldr r8, [r1], #0x04
850 ldr r9, [r1], #0x04
851 strd r4, [r3], #0x08
852 ldr r4, [r1], #0x04
853 ldr r5, [r1], #0x04
854 strd r6, [r3], #0x08
855 strd r8, [r3], #0x08
856 subs r2, r2, #0x20
857 strd r4, [r3], #0x08
858 bge .Lmemcpy_w_loop32
859
860 .Lmemcpy_w_lessthan32:
861 adds r2, r2, #0x20 /* Adjust for extra sub */
862 ldmfdeq sp!, {r4-r9}
863 RETeq /* Return now if done */
864
865 and r4, r2, #0x18
866 rsbs r4, r4, #0x18
867 addne pc, pc, r4, lsl #1
868 nop
869
870 /* At least 24 bytes remaining */
871 ldr r4, [r1], #0x04
872 ldr r5, [r1], #0x04
873 sub r2, r2, #0x08
874 strd r4, [r3], #0x08
875
876 /* At least 16 bytes remaining */
877 ldr r4, [r1], #0x04
878 ldr r5, [r1], #0x04
879 sub r2, r2, #0x08
880 strd r4, [r3], #0x08
881
882 /* At least 8 bytes remaining */
883 ldr r4, [r1], #0x04
884 ldr r5, [r1], #0x04
885 subs r2, r2, #0x08
886 strd r4, [r3], #0x08
887
888 /* Less than 8 bytes remaining */
889 ldmfd sp!, {r4-r9}
890 RETeq /* Return now if done */
891 subs r2, r2, #0x04
892 ldrge ip, [r1], #0x04
893 strge ip, [r3], #0x04
894 RETeq /* Return now if done */
895 addlt r2, r2, #0x04
896 ldrb ip, [r1], #0x01
897 cmp r2, #0x02
898 ldrbge r2, [r1], #0x01
899 strb ip, [r3], #0x01
900 ldrbgt ip, [r1]
901 strbge r2, [r3], #0x01
902 strbgt ip, [r3]
903 RET
904 /* Place a literal pool here for the above ldr instructions to use */
905 .ltorg
906
907
908 /*
909 * At this point, it has not been possible to word align both buffers.
910 * The destination buffer is word aligned, but the source buffer is not.
911 */
912 .Lmemcpy_bad_align:
913 stmfd sp!, {r4-r7}
914 bic r1, r1, #0x03
915 cmp ip, #2
916 ldr ip, [r1], #0x04
917 bgt .Lmemcpy_bad3
918 beq .Lmemcpy_bad2
919 b .Lmemcpy_bad1
920
921 .Lmemcpy_bad1_loop16:
922 mov r4, ip, lsr #8
923 ldr r5, [r1], #0x04
924 pld [r1, #0x018]
925 ldr r6, [r1], #0x04
926 ldr r7, [r1], #0x04
927 ldr ip, [r1], #0x04
928 orr r4, r4, r5, lsl #24
929 mov r5, r5, lsr #8
930 orr r5, r5, r6, lsl #24
931 mov r6, r6, lsr #8
932 orr r6, r6, r7, lsl #24
933 mov r7, r7, lsr #8
934 orr r7, r7, ip, lsl #24
935 str r4, [r3], #0x04
936 str r5, [r3], #0x04
937 str r6, [r3], #0x04
938 str r7, [r3], #0x04
939 .Lmemcpy_bad1:
940 subs r2, r2, #0x10
941 bge .Lmemcpy_bad1_loop16
942
943 adds r2, r2, #0x10
944 ldmfdeq sp!, {r4-r7}
945 RETeq /* Return now if done */
946 subs r2, r2, #0x04
947 sublt r1, r1, #0x03
948 blt .Lmemcpy_bad_done
949
950 .Lmemcpy_bad1_loop4:
951 mov r4, ip, lsr #8
952 ldr ip, [r1], #0x04
953 subs r2, r2, #0x04
954 orr r4, r4, ip, lsl #24
955 str r4, [r3], #0x04
956 bge .Lmemcpy_bad1_loop4
957 sub r1, r1, #0x03
958 b .Lmemcpy_bad_done
959
960 .Lmemcpy_bad2_loop16:
961 mov r4, ip, lsr #16
962 ldr r5, [r1], #0x04
963 pld [r1, #0x018]
964 ldr r6, [r1], #0x04
965 ldr r7, [r1], #0x04
966 ldr ip, [r1], #0x04
967 orr r4, r4, r5, lsl #16
968 mov r5, r5, lsr #16
969 orr r5, r5, r6, lsl #16
970 mov r6, r6, lsr #16
971 orr r6, r6, r7, lsl #16
972 mov r7, r7, lsr #16
973 orr r7, r7, ip, lsl #16
974 str r4, [r3], #0x04
975 str r5, [r3], #0x04
976 str r6, [r3], #0x04
977 str r7, [r3], #0x04
978 .Lmemcpy_bad2:
979 subs r2, r2, #0x10
980 bge .Lmemcpy_bad2_loop16
981
982 adds r2, r2, #0x10
983 ldmfdeq sp!, {r4-r7}
984 RETeq /* Return now if done */
985 subs r2, r2, #0x04
986 sublt r1, r1, #0x02
987 blt .Lmemcpy_bad_done
988
989 .Lmemcpy_bad2_loop4:
990 mov r4, ip, lsr #16
991 ldr ip, [r1], #0x04
992 subs r2, r2, #0x04
993 orr r4, r4, ip, lsl #16
994 str r4, [r3], #0x04
995 bge .Lmemcpy_bad2_loop4
996 sub r1, r1, #0x02
997 b .Lmemcpy_bad_done
998
999 .Lmemcpy_bad3_loop16:
1000 mov r4, ip, lsr #24
1001 ldr r5, [r1], #0x04
1002 pld [r1, #0x018]
1003 ldr r6, [r1], #0x04
1004 ldr r7, [r1], #0x04
1005 ldr ip, [r1], #0x04
1006 orr r4, r4, r5, lsl #8
1007 mov r5, r5, lsr #24
1008 orr r5, r5, r6, lsl #8
1009 mov r6, r6, lsr #24
1010 orr r6, r6, r7, lsl #8
1011 mov r7, r7, lsr #24
1012 orr r7, r7, ip, lsl #8
1013 str r4, [r3], #0x04
1014 str r5, [r3], #0x04
1015 str r6, [r3], #0x04
1016 str r7, [r3], #0x04
1017 .Lmemcpy_bad3:
1018 subs r2, r2, #0x10
1019 bge .Lmemcpy_bad3_loop16
1020
1021 adds r2, r2, #0x10
1022 ldmfdeq sp!, {r4-r7}
1023 RETeq /* Return now if done */
1024 subs r2, r2, #0x04
1025 sublt r1, r1, #0x01
1026 blt .Lmemcpy_bad_done
1027
1028 .Lmemcpy_bad3_loop4:
1029 mov r4, ip, lsr #24
1030 ldr ip, [r1], #0x04
1031 subs r2, r2, #0x04
1032 orr r4, r4, ip, lsl #8
1033 str r4, [r3], #0x04
1034 bge .Lmemcpy_bad3_loop4
1035 sub r1, r1, #0x01
1036
1037 .Lmemcpy_bad_done:
1038 ldmfd sp!, {r4-r7}
1039 adds r2, r2, #0x04
1040 RETeq
1041 ldrb ip, [r1], #0x01
1042 cmp r2, #0x02
1043 ldrbge r2, [r1], #0x01
1044 strb ip, [r3], #0x01
1045 ldrbgt ip, [r1]
1046 strbge r2, [r3], #0x01
1047 strbgt ip, [r3]
1048 RET
1049
1050
1051 /*
1052 * Handle short copies (less than 16 bytes), possibly misaligned.
1053 * Some of these are *very* common, thanks to the network stack,
1054 * and so are handled specially.
1055 */
1056 .Lmemcpy_short:
1057 add pc, pc, r2, lsl #2
1058 nop
1059 RET /* 0x00 */
1060 b .Lmemcpy_bytewise /* 0x01 */
1061 b .Lmemcpy_bytewise /* 0x02 */
1062 b .Lmemcpy_bytewise /* 0x03 */
1063 b .Lmemcpy_4 /* 0x04 */
1064 b .Lmemcpy_bytewise /* 0x05 */
1065 b .Lmemcpy_6 /* 0x06 */
1066 b .Lmemcpy_bytewise /* 0x07 */
1067 b .Lmemcpy_8 /* 0x08 */
1068 b .Lmemcpy_bytewise /* 0x09 */
1069 b .Lmemcpy_bytewise /* 0x0a */
1070 b .Lmemcpy_bytewise /* 0x0b */
1071 b .Lmemcpy_c /* 0x0c */
1072 .Lmemcpy_bytewise:
1073 mov r3, r0 /* We must not clobber r0 */
1074 ldrb ip, [r1], #0x01
1075 1: subs r2, r2, #0x01
1076 strb ip, [r3], #0x01
1077 ldrbne ip, [r1], #0x01
1078 bne 1b
1079 RET
1080
1081 /******************************************************************************
1082 * Special case for 4 byte copies
1083 */
1084 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1085 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1086 LMEMCPY_4_PAD
1087 .Lmemcpy_4:
1088 and r2, r1, #0x03
1089 orr r2, r2, r0, lsl #2
1090 ands r2, r2, #0x0f
1091 sub r3, pc, #0x14
1092 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1093
1094 /*
1095 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1096 */
1097 ldr r2, [r1]
1098 str r2, [r0]
1099 RET
1100 LMEMCPY_4_PAD
1101
1102 /*
1103 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1104 */
1105 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1106 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1107 mov r3, r3, lsr #8 /* r3 = .210 */
1108 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1109 str r3, [r0]
1110 RET
1111 LMEMCPY_4_PAD
1112
1113 /*
1114 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1115 */
1116 ldrh r3, [r1, #0x02]
1117 ldrh r2, [r1]
1118 orr r3, r2, r3, lsl #16
1119 str r3, [r0]
1120 RET
1121 LMEMCPY_4_PAD
1122
1123 /*
1124 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1125 */
1126 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1127 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1128 mov r3, r3, lsr #24 /* r3 = ...0 */
1129 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1130 str r3, [r0]
1131 RET
1132 LMEMCPY_4_PAD
1133
1134 /*
1135 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1136 */
1137 ldr r2, [r1]
1138 strb r2, [r0]
1139 mov r3, r2, lsr #8
1140 mov r1, r2, lsr #24
1141 strb r1, [r0, #0x03]
1142 strh r3, [r0, #0x01]
1143 RET
1144 LMEMCPY_4_PAD
1145
1146 /*
1147 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1148 */
1149 ldrb r2, [r1]
1150 ldrh r3, [r1, #0x01]
1151 ldrb r1, [r1, #0x03]
1152 strb r2, [r0]
1153 strh r3, [r0, #0x01]
1154 strb r1, [r0, #0x03]
1155 RET
1156 LMEMCPY_4_PAD
1157
1158 /*
1159 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1160 */
1161 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1162 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1163 strb r2, [r0]
1164 mov r2, r2, lsr #8 /* r2 = ...1 */
1165 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1166 mov r3, r3, lsr #8 /* r3 = ...3 */
1167 strh r2, [r0, #0x01]
1168 strb r3, [r0, #0x03]
1169 RET
1170 LMEMCPY_4_PAD
1171
1172 /*
1173 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1174 */
1175 ldrb r2, [r1]
1176 ldrh r3, [r1, #0x01]
1177 ldrb r1, [r1, #0x03]
1178 strb r2, [r0]
1179 strh r3, [r0, #0x01]
1180 strb r1, [r0, #0x03]
1181 RET
1182 LMEMCPY_4_PAD
1183
1184 /*
1185 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1186 */
1187 ldr r2, [r1]
1188 strh r2, [r0]
1189 mov r3, r2, lsr #16
1190 strh r3, [r0, #0x02]
1191 RET
1192 LMEMCPY_4_PAD
1193
1194 /*
1195 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1196 */
1197 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1198 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1199 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1200 strh r1, [r0]
1201 mov r2, r2, lsr #24 /* r2 = ...2 */
1202 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1203 strh r2, [r0, #0x02]
1204 RET
1205 LMEMCPY_4_PAD
1206
1207 /*
1208 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1209 */
1210 ldrh r2, [r1]
1211 ldrh r3, [r1, #0x02]
1212 strh r2, [r0]
1213 strh r3, [r0, #0x02]
1214 RET
1215 LMEMCPY_4_PAD
1216
1217 /*
1218 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1219 */
1220 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1221 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1222 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1223 strh r1, [r0, #0x02]
1224 mov r3, r3, lsl #8 /* r3 = 321. */
1225 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1226 strh r3, [r0]
1227 RET
1228 LMEMCPY_4_PAD
1229
1230 /*
1231 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1232 */
1233 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1234 strb r2, [r0]
1235 mov r3, r2, lsr #8
1236 mov r1, r2, lsr #24
1237 strh r3, [r0, #0x01]
1238 strb r1, [r0, #0x03]
1239 RET
1240 LMEMCPY_4_PAD
1241
1242 /*
1243 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1244 */
1245 ldrb r2, [r1]
1246 ldrh r3, [r1, #0x01]
1247 ldrb r1, [r1, #0x03]
1248 strb r2, [r0]
1249 strh r3, [r0, #0x01]
1250 strb r1, [r0, #0x03]
1251 RET
1252 LMEMCPY_4_PAD
1253
1254 /*
1255 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1256 */
1257 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1258 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1259 strb r2, [r0]
1260 mov r2, r2, lsr #8 /* r2 = ...1 */
1261 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1262 strh r2, [r0, #0x01]
1263 mov r3, r3, lsr #8 /* r3 = ...3 */
1264 strb r3, [r0, #0x03]
1265 RET
1266 LMEMCPY_4_PAD
1267
1268 /*
1269 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1270 */
1271 ldrb r2, [r1]
1272 ldrh r3, [r1, #0x01]
1273 ldrb r1, [r1, #0x03]
1274 strb r2, [r0]
1275 strh r3, [r0, #0x01]
1276 strb r1, [r0, #0x03]
1277 RET
1278 LMEMCPY_4_PAD
1279
1280
1281 /******************************************************************************
1282 * Special case for 6 byte copies
1283 */
1284 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1285 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1286 LMEMCPY_6_PAD
1287 .Lmemcpy_6:
1288 and r2, r1, #0x03
1289 orr r2, r2, r0, lsl #2
1290 ands r2, r2, #0x0f
1291 sub r3, pc, #0x14
1292 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1293
1294 /*
1295 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1296 */
1297 ldr r2, [r1]
1298 ldrh r3, [r1, #0x04]
1299 str r2, [r0]
1300 strh r3, [r0, #0x04]
1301 RET
1302 LMEMCPY_6_PAD
1303
1304 /*
1305 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1306 */
1307 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1308 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1309 mov r2, r2, lsr #8 /* r2 = .210 */
1310 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1311 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1312 str r2, [r0]
1313 strh r3, [r0, #0x04]
1314 RET
1315 LMEMCPY_6_PAD
1316
1317 /*
1318 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1319 */
1320 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1321 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1322 mov r1, r3, lsr #16 /* r1 = ..54 */
1323 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1324 str r2, [r0]
1325 strh r1, [r0, #0x04]
1326 RET
1327 LMEMCPY_6_PAD
1328
1329 /*
1330 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1331 */
1332 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1333 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1334 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1335 mov r2, r2, lsr #24 /* r2 = ...0 */
1336 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1337 mov r1, r1, lsl #8 /* r1 = xx5. */
1338 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1339 str r2, [r0]
1340 strh r1, [r0, #0x04]
1341 RET
1342 LMEMCPY_6_PAD
1343
1344 /*
1345 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1346 */
1347 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1348 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1349 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1350 strh r1, [r0, #0x01]
1351 strb r3, [r0]
1352 mov r3, r3, lsr #24 /* r3 = ...3 */
1353 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1354 mov r2, r2, lsr #8 /* r2 = ...5 */
1355 strh r3, [r0, #0x03]
1356 strb r2, [r0, #0x05]
1357 RET
1358 LMEMCPY_6_PAD
1359
1360 /*
1361 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1362 */
1363 ldrb r2, [r1]
1364 ldrh r3, [r1, #0x01]
1365 ldrh ip, [r1, #0x03]
1366 ldrb r1, [r1, #0x05]
1367 strb r2, [r0]
1368 strh r3, [r0, #0x01]
1369 strh ip, [r0, #0x03]
1370 strb r1, [r0, #0x05]
1371 RET
1372 LMEMCPY_6_PAD
1373
1374 /*
1375 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1376 */
1377 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1378 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1379 strb r2, [r0]
1380 mov r3, r1, lsr #24
1381 strb r3, [r0, #0x05]
1382 mov r3, r1, lsr #8 /* r3 = .543 */
1383 strh r3, [r0, #0x03]
1384 mov r3, r2, lsr #8 /* r3 = ...1 */
1385 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
1386 strh r3, [r0, #0x01]
1387 RET
1388 LMEMCPY_6_PAD
1389
1390 /*
1391 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1392 */
1393 ldrb r2, [r1]
1394 ldrh r3, [r1, #0x01]
1395 ldrh ip, [r1, #0x03]
1396 ldrb r1, [r1, #0x05]
1397 strb r2, [r0]
1398 strh r3, [r0, #0x01]
1399 strh ip, [r0, #0x03]
1400 strb r1, [r0, #0x05]
1401 RET
1402 LMEMCPY_6_PAD
1403
1404 /*
1405 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1406 */
1407 ldrh r2, [r1, #0x04] /* r2 = ..54 */
1408 ldr r3, [r1] /* r3 = 3210 */
1409 mov r2, r2, lsl #16 /* r2 = 54.. */
1410 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
1411 strh r3, [r0]
1412 str r2, [r0, #0x02]
1413 RET
1414 LMEMCPY_6_PAD
1415
1416 /*
1417 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1418 */
1419 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1420 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
1421 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1422 mov r2, r2, lsl #8 /* r2 = 543. */
1423 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
1424 strh r1, [r0]
1425 str r2, [r0, #0x02]
1426 RET
1427 LMEMCPY_6_PAD
1428
1429 /*
1430 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1431 */
1432 ldrh r2, [r1]
1433 ldr r3, [r1, #0x02]
1434 strh r2, [r0]
1435 str r3, [r0, #0x02]
1436 RET
1437 LMEMCPY_6_PAD
1438
1439 /*
1440 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1441 */
1442 ldrb r3, [r1] /* r3 = ...0 */
1443 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1444 ldrb r1, [r1, #0x05] /* r1 = ...5 */
1445 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1446 mov r1, r1, lsl #24 /* r1 = 5... */
1447 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
1448 strh r3, [r0]
1449 str r1, [r0, #0x02]
1450 RET
1451 LMEMCPY_6_PAD
1452
1453 /*
1454 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1455 */
1456 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1457 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
1458 strb r2, [r0]
1459 mov r2, r2, lsr #8 /* r2 = .321 */
1460 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
1461 mov r1, r1, lsr #8 /* r1 = ...5 */
1462 str r2, [r0, #0x01]
1463 strb r1, [r0, #0x05]
1464 RET
1465 LMEMCPY_6_PAD
1466
1467 /*
1468 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1469 */
1470 ldrb r2, [r1]
1471 ldrh r3, [r1, #0x01]
1472 ldrh ip, [r1, #0x03]
1473 ldrb r1, [r1, #0x05]
1474 strb r2, [r0]
1475 strh r3, [r0, #0x01]
1476 strh ip, [r0, #0x03]
1477 strb r1, [r0, #0x05]
1478 RET
1479 LMEMCPY_6_PAD
1480
1481 /*
1482 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1483 */
1484 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1485 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1486 strb r2, [r0]
1487 mov r2, r2, lsr #8 /* r2 = ...1 */
1488 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
1489 mov r1, r1, lsr #24 /* r1 = ...5 */
1490 str r2, [r0, #0x01]
1491 strb r1, [r0, #0x05]
1492 RET
1493 LMEMCPY_6_PAD
1494
1495 /*
1496 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1497 */
1498 ldrb r2, [r1]
1499 ldr r3, [r1, #0x01]
1500 ldrb r1, [r1, #0x05]
1501 strb r2, [r0]
1502 str r3, [r0, #0x01]
1503 strb r1, [r0, #0x05]
1504 RET
1505 LMEMCPY_6_PAD
1506
1507
1508 /******************************************************************************
1509 * Special case for 8 byte copies
1510 */
1511 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
1512 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
1513 LMEMCPY_8_PAD
1514 .Lmemcpy_8:
1515 and r2, r1, #0x03
1516 orr r2, r2, r0, lsl #2
1517 ands r2, r2, #0x0f
1518 sub r3, pc, #0x14
1519 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
1520
1521 /*
1522 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1523 */
1524 ldr r2, [r1]
1525 ldr r3, [r1, #0x04]
1526 str r2, [r0]
1527 str r3, [r0, #0x04]
1528 RET
1529 LMEMCPY_8_PAD
1530
1531 /*
1532 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1533 */
1534 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1535 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
1536 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1537 mov r3, r3, lsr #8 /* r3 = .210 */
1538 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1539 mov r1, r1, lsl #24 /* r1 = 7... */
1540 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
1541 str r3, [r0]
1542 str r2, [r0, #0x04]
1543 RET
1544 LMEMCPY_8_PAD
1545
1546 /*
1547 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1548 */
1549 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1550 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1551 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1552 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1553 mov r3, r3, lsr #16 /* r3 = ..54 */
1554 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
1555 str r2, [r0]
1556 str r3, [r0, #0x04]
1557 RET
1558 LMEMCPY_8_PAD
1559
1560 /*
1561 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1562 */
1563 ldrb r3, [r1] /* r3 = ...0 */
1564 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1565 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
1566 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1567 mov r2, r2, lsr #24 /* r2 = ...4 */
1568 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
1569 str r3, [r0]
1570 str r2, [r0, #0x04]
1571 RET
1572 LMEMCPY_8_PAD
1573
1574 /*
1575 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1576 */
1577 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1578 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
1579 strb r3, [r0]
1580 mov r1, r2, lsr #24 /* r1 = ...7 */
1581 strb r1, [r0, #0x07]
1582 mov r1, r3, lsr #8 /* r1 = .321 */
1583 mov r3, r3, lsr #24 /* r3 = ...3 */
1584 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
1585 strh r1, [r0, #0x01]
1586 str r3, [r0, #0x03]
1587 RET
1588 LMEMCPY_8_PAD
1589
1590 /*
1591 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1592 */
1593 ldrb r2, [r1]
1594 ldrh r3, [r1, #0x01]
1595 ldr ip, [r1, #0x03]
1596 ldrb r1, [r1, #0x07]
1597 strb r2, [r0]
1598 strh r3, [r0, #0x01]
1599 str ip, [r0, #0x03]
1600 strb r1, [r0, #0x07]
1601 RET
1602 LMEMCPY_8_PAD
1603
1604 /*
1605 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1606 */
1607 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1608 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1609 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1610 strb r2, [r0] /* 0 */
1611 mov ip, r1, lsr #8 /* ip = ...7 */
1612 strb ip, [r0, #0x07] /* 7 */
1613 mov ip, r2, lsr #8 /* ip = ...1 */
1614 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1615 mov r3, r3, lsr #8 /* r3 = .543 */
1616 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
1617 strh ip, [r0, #0x01]
1618 str r3, [r0, #0x03]
1619 RET
1620 LMEMCPY_8_PAD
1621
1622 /*
1623 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1624 */
1625 ldrb r3, [r1] /* r3 = ...0 */
1626 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1627 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
1628 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1629 strb r3, [r0]
1630 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
1631 strh ip, [r0, #0x01]
1632 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
1633 str r2, [r0, #0x03]
1634 strb r1, [r0, #0x07]
1635 RET
1636 LMEMCPY_8_PAD
1637
1638 /*
1639 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1640 */
1641 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1642 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1643 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1644 strh r2, [r0]
1645 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
1646 mov r3, r3, lsr #16 /* r3 = ..76 */
1647 str r2, [r0, #0x02]
1648 strh r3, [r0, #0x06]
1649 RET
1650 LMEMCPY_8_PAD
1651
1652 /*
1653 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1654 */
1655 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1656 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1657 ldrb ip, [r1, #0x07] /* ip = ...7 */
1658 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1659 strh r1, [r0]
1660 mov r1, r2, lsr #24 /* r1 = ...2 */
1661 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
1662 mov r3, r3, lsr #24 /* r3 = ...6 */
1663 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
1664 str r1, [r0, #0x02]
1665 strh r3, [r0, #0x06]
1666 RET
1667 LMEMCPY_8_PAD
1668
1669 /*
1670 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1671 */
1672 ldrh r2, [r1]
1673 ldr ip, [r1, #0x02]
1674 ldrh r3, [r1, #0x06]
1675 strh r2, [r0]
1676 str ip, [r0, #0x02]
1677 strh r3, [r0, #0x06]
1678 RET
1679 LMEMCPY_8_PAD
1680
1681 /*
1682 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1683 */
1684 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
1685 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1686 ldrb ip, [r1] /* ip = ...0 */
1687 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
1688 strh r1, [r0, #0x06]
1689 mov r3, r3, lsl #24 /* r3 = 5... */
1690 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
1691 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
1692 str r3, [r0, #0x02]
1693 strh r2, [r0]
1694 RET
1695 LMEMCPY_8_PAD
1696
1697 /*
1698 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1699 */
1700 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1701 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1702 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
1703 strh r1, [r0, #0x05]
1704 strb r2, [r0]
1705 mov r1, r3, lsr #24 /* r1 = ...7 */
1706 strb r1, [r0, #0x07]
1707 mov r2, r2, lsr #8 /* r2 = .321 */
1708 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
1709 str r2, [r0, #0x01]
1710 RET
1711 LMEMCPY_8_PAD
1712
1713 /*
1714 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1715 */
1716 ldrb r3, [r1] /* r3 = ...0 */
1717 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
1718 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1719 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1720 strb r3, [r0]
1721 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
1722 strh r3, [r0, #0x05]
1723 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
1724 str r2, [r0, #0x01]
1725 strb r1, [r0, #0x07]
1726 RET
1727 LMEMCPY_8_PAD
1728
1729 /*
1730 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1731 */
1732 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1733 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1734 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1735 strb r2, [r0]
1736 mov ip, r2, lsr #8 /* ip = ...1 */
1737 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1738 mov r2, r1, lsr #8 /* r2 = ...7 */
1739 strb r2, [r0, #0x07]
1740 mov r1, r1, lsl #8 /* r1 = .76. */
1741 orr r1, r1, r3, lsr #24 /* r1 = .765 */
1742 str ip, [r0, #0x01]
1743 strh r1, [r0, #0x05]
1744 RET
1745 LMEMCPY_8_PAD
1746
1747 /*
1748 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1749 */
1750 ldrb r2, [r1]
1751 ldr ip, [r1, #0x01]
1752 ldrh r3, [r1, #0x05]
1753 ldrb r1, [r1, #0x07]
1754 strb r2, [r0]
1755 str ip, [r0, #0x01]
1756 strh r3, [r0, #0x05]
1757 strb r1, [r0, #0x07]
1758 RET
1759 LMEMCPY_8_PAD
1760
1761 /******************************************************************************
1762 * Special case for 12 byte copies
1763 */
1764 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
1765 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
1766 LMEMCPY_C_PAD
1767 .Lmemcpy_c:
1768 and r2, r1, #0x03
1769 orr r2, r2, r0, lsl #2
1770 ands r2, r2, #0x0f
1771 sub r3, pc, #0x14
1772 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
1773
1774 /*
1775 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1776 */
1777 ldr r2, [r1]
1778 ldr r3, [r1, #0x04]
1779 ldr r1, [r1, #0x08]
1780 str r2, [r0]
1781 str r3, [r0, #0x04]
1782 str r1, [r0, #0x08]
1783 RET
1784 LMEMCPY_C_PAD
1785
1786 /*
1787 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1788 */
1789 ldrb r2, [r1, #0xb] /* r2 = ...B */
1790 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1791 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1792 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1793 mov r2, r2, lsl #24 /* r2 = B... */
1794 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
1795 str r2, [r0, #0x08]
1796 mov r2, ip, lsl #24 /* r2 = 7... */
1797 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
1798 mov r1, r1, lsr #8 /* r1 = .210 */
1799 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
1800 str r2, [r0, #0x04]
1801 str r1, [r0]
1802 RET
1803 LMEMCPY_C_PAD
1804
1805 /*
1806 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1807 */
1808 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1809 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1810 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1811 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1812 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1813 str r2, [r0]
1814 mov r3, r3, lsr #16 /* r3 = ..54 */
1815 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
1816 mov r1, r1, lsl #16 /* r1 = BA.. */
1817 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
1818 str r3, [r0, #0x04]
1819 str r1, [r0, #0x08]
1820 RET
1821 LMEMCPY_C_PAD
1822
1823 /*
1824 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1825 */
1826 ldrb r2, [r1] /* r2 = ...0 */
1827 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1828 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1829 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1830 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1831 str r2, [r0]
1832 mov r3, r3, lsr #24 /* r3 = ...4 */
1833 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
1834 mov r1, r1, lsl #8 /* r1 = BA9. */
1835 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
1836 str r3, [r0, #0x04]
1837 str r1, [r0, #0x08]
1838 RET
1839 LMEMCPY_C_PAD
1840
1841 /*
1842 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1843 */
1844 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1845 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1846 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
1847 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1848 strh r1, [r0, #0x01]
1849 strb r2, [r0]
1850 mov r1, r2, lsr #24 /* r1 = ...3 */
1851 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
1852 mov r1, r3, lsr #24 /* r1 = ...7 */
1853 orr r1, r1, ip, lsl #8 /* r1 = A987 */
1854 mov ip, ip, lsr #24 /* ip = ...B */
1855 str r2, [r0, #0x03]
1856 str r1, [r0, #0x07]
1857 strb ip, [r0, #0x0b]
1858 RET
1859 LMEMCPY_C_PAD
1860
1861 /*
1862 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1863 */
1864 ldrb r2, [r1]
1865 ldrh r3, [r1, #0x01]
1866 ldr ip, [r1, #0x03]
1867 strb r2, [r0]
1868 ldr r2, [r1, #0x07]
1869 ldrb r1, [r1, #0x0b]
1870 strh r3, [r0, #0x01]
1871 str ip, [r0, #0x03]
1872 str r2, [r0, #0x07]
1873 strb r1, [r0, #0x0b]
1874 RET
1875 LMEMCPY_C_PAD
1876
1877 /*
1878 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1879 */
1880 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1881 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1882 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1883 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1884 strb r2, [r0]
1885 mov r2, r2, lsr #8 /* r2 = ...1 */
1886 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1887 strh r2, [r0, #0x01]
1888 mov r2, r3, lsr #8 /* r2 = .543 */
1889 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
1890 mov r2, ip, lsr #8 /* r2 = .987 */
1891 orr r2, r2, r1, lsl #24 /* r2 = A987 */
1892 mov r1, r1, lsr #8 /* r1 = ...B */
1893 str r3, [r0, #0x03]
1894 str r2, [r0, #0x07]
1895 strb r1, [r0, #0x0b]
1896 RET
1897 LMEMCPY_C_PAD
1898
1899 /*
1900 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1901 */
1902 ldrb r2, [r1]
1903 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1904 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1905 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1906 strb r2, [r0]
1907 strh r3, [r0, #0x01]
1908 mov r3, r3, lsr #16 /* r3 = ..43 */
1909 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
1910 mov ip, ip, lsr #16 /* ip = ..87 */
1911 orr ip, ip, r1, lsl #16 /* ip = A987 */
1912 mov r1, r1, lsr #16 /* r1 = ..xB */
1913 str r3, [r0, #0x03]
1914 str ip, [r0, #0x07]
1915 strb r1, [r0, #0x0b]
1916 RET
1917 LMEMCPY_C_PAD
1918
1919 /*
1920 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1921 */
1922 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
1923 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1924 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
1925 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1926 strh ip, [r0]
1927 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
1928 mov r3, r3, lsr #16 /* r3 = ..76 */
1929 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
1930 mov r2, r2, lsr #16 /* r2 = ..BA */
1931 str r1, [r0, #0x02]
1932 str r3, [r0, #0x06]
1933 strh r2, [r0, #0x0a]
1934 RET
1935 LMEMCPY_C_PAD
1936
1937 /*
1938 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1939 */
1940 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1941 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1942 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
1943 strh ip, [r0]
1944 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1945 ldrb r1, [r1, #0x0b] /* r1 = ...B */
1946 mov r2, r2, lsr #24 /* r2 = ...2 */
1947 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
1948 mov r3, r3, lsr #24 /* r3 = ...6 */
1949 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
1950 mov r1, r1, lsl #8 /* r1 = ..B. */
1951 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
1952 str r2, [r0, #0x02]
1953 str r3, [r0, #0x06]
1954 strh r1, [r0, #0x0a]
1955 RET
1956 LMEMCPY_C_PAD
1957
1958 /*
1959 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1960 */
1961 ldrh r2, [r1]
1962 ldr r3, [r1, #0x02]
1963 ldr ip, [r1, #0x06]
1964 ldrh r1, [r1, #0x0a]
1965 strh r2, [r0]
1966 str r3, [r0, #0x02]
1967 str ip, [r0, #0x06]
1968 strh r1, [r0, #0x0a]
1969 RET
1970 LMEMCPY_C_PAD
1971
1972 /*
1973 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1974 */
1975 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
1976 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
1977 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
1978 strh ip, [r0, #0x0a]
1979 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1980 ldrb r1, [r1] /* r1 = ...0 */
1981 mov r2, r2, lsl #24 /* r2 = 9... */
1982 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
1983 mov r3, r3, lsl #24 /* r3 = 5... */
1984 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
1985 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
1986 str r2, [r0, #0x06]
1987 str r3, [r0, #0x02]
1988 strh r1, [r0]
1989 RET
1990 LMEMCPY_C_PAD
1991
1992 /*
1993 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1994 */
1995 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1996 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
1997 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
1998 strb r2, [r0]
1999 mov r3, r2, lsr #8 /* r3 = .321 */
2000 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2001 str r3, [r0, #0x01]
2002 mov r3, ip, lsr #8 /* r3 = .765 */
2003 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2004 str r3, [r0, #0x05]
2005 mov r1, r1, lsr #8 /* r1 = .BA9 */
2006 strh r1, [r0, #0x09]
2007 mov r1, r1, lsr #16 /* r1 = ...B */
2008 strb r1, [r0, #0x0b]
2009 RET
2010 LMEMCPY_C_PAD
2011
2012 /*
2013 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2014 */
2015 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2016 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2017 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2018 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2019 strb r2, [r0, #0x0b]
2020 mov r2, r3, lsr #16 /* r2 = ..A9 */
2021 strh r2, [r0, #0x09]
2022 mov r3, r3, lsl #16 /* r3 = 87.. */
2023 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2024 mov ip, ip, lsl #16 /* ip = 43.. */
2025 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2026 mov r1, r1, lsr #8 /* r1 = .210 */
2027 str r3, [r0, #0x05]
2028 str ip, [r0, #0x01]
2029 strb r1, [r0]
2030 RET
2031 LMEMCPY_C_PAD
2032
2033 /*
2034 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2035 */
2036 ldrh r2, [r1] /* r2 = ..10 */
2037 ldr r3, [r1, #0x02] /* r3 = 5432 */
2038 ldr ip, [r1, #0x06] /* ip = 9876 */
2039 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2040 strb r2, [r0]
2041 mov r2, r2, lsr #8 /* r2 = ...1 */
2042 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2043 mov r3, r3, lsr #24 /* r3 = ...5 */
2044 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2045 mov ip, ip, lsr #24 /* ip = ...9 */
2046 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2047 mov r1, r1, lsr #8 /* r1 = ...B */
2048 str r2, [r0, #0x01]
2049 str r3, [r0, #0x05]
2050 strh ip, [r0, #0x09]
2051 strb r1, [r0, #0x0b]
2052 RET
2053 LMEMCPY_C_PAD
2054
2055 /*
2056 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2057 */
2058 ldrb r2, [r1]
2059 ldr r3, [r1, #0x01]
2060 ldr ip, [r1, #0x05]
2061 strb r2, [r0]
2062 ldrh r2, [r1, #0x09]
2063 ldrb r1, [r1, #0x0b]
2064 str r3, [r0, #0x01]
2065 str ip, [r0, #0x05]
2066 strh r2, [r0, #0x09]
2067 strb r1, [r0, #0x0b]
2068 RET
2069 END(memcpy)
Cache object: 9da32b34e4ea16013bec43389f94bb24
|