FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
29 *
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
46 * written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 */
60 /*
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
63 *
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 * 3. All advertising materials mentioning features or use of this software
76 * must display the following acknowledgement:
77 * This product includes software developed by the NetBSD
78 * Foundation, Inc. and its contributors.
79 * 4. Neither the name of The NetBSD Foundation nor the names of its
80 * contributors may be used to endorse or promote products derived
81 * from this software without specific prior written permission.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
84 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
85 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
86 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
87 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
88 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
89 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
90 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
91 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
92 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
93 * POSSIBILITY OF SUCH DAMAGE.
94 */
95
96 #include <machine/asm.h>
97 #include <machine/asmacros.h>
98 __FBSDID("$FreeBSD: releng/8.1/sys/arm/arm/support.S 175255 2008-01-12 21:11:43Z cognet $");
99
100 #include "assym.s"
101
102 .L_arm_memcpy:
103 .word _C_LABEL(_arm_memcpy)
104 .L_arm_bzero:
105 .word _C_LABEL(_arm_bzero)
106 .L_min_memcpy_size:
107 .word _C_LABEL(_min_memcpy_size)
108 .L_min_bzero_size:
109 .word _C_LABEL(_min_bzero_size)
110 /*
111 * memset: Sets a block of memory to the specified value
112 *
113 * On entry:
114 * r0 - dest address
115 * r1 - byte to write
116 * r2 - number of bytes to write
117 *
118 * On exit:
119 * r0 - dest address
120 */
121 /* LINTSTUB: Func: void bzero(void *, size_t) */
122 ENTRY(bzero)
123 ldr r3, .L_arm_bzero
124 ldr r3, [r3]
125 cmp r3, #0
126 beq .Lnormal0
127 ldr r2, .L_min_bzero_size
128 ldr r2, [r2]
129 cmp r1, r2
130 blt .Lnormal0
131 stmfd sp!, {r0, r1, lr}
132 mov r2, #0
133 mov lr, pc
134 mov pc, r3
135 cmp r0, #0
136 ldmfd sp!, {r0, r1, lr}
137 RETeq
138 .Lnormal0:
139 mov r3, #0x00
140 b do_memset
141
142 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
143 ENTRY(memset)
144 and r3, r1, #0xff /* We deal with bytes */
145 mov r1, r2
146 do_memset:
147 cmp r1, #0x04 /* Do we have less than 4 bytes */
148 mov ip, r0
149 blt .Lmemset_lessthanfour
150
151 /* Ok first we will word align the address */
152 ands r2, ip, #0x03 /* Get the bottom two bits */
153 bne .Lmemset_wordunaligned /* The address is not word aligned */
154
155 /* We are now word aligned */
156 .Lmemset_wordaligned:
157 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
158 #ifdef _ARM_ARCH_5E
159 tst ip, #0x04 /* Quad-align for armv5e */
160 #else
161 cmp r1, #0x10
162 #endif
163 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
164 #ifdef _ARM_ARCH_5E
165 subne r1, r1, #0x04 /* Quad-align if necessary */
166 strne r3, [ip], #0x04
167 cmp r1, #0x10
168 #endif
169 blt .Lmemset_loop4 /* If less than 16 then use words */
170 mov r2, r3 /* Duplicate data */
171 cmp r1, #0x80 /* If < 128 then skip the big loop */
172 blt .Lmemset_loop32
173
174 /* Do 128 bytes at a time */
175 .Lmemset_loop128:
176 subs r1, r1, #0x80
177 #ifdef _ARM_ARCH_5E
178 strged r2, [ip], #0x08
179 strged r2, [ip], #0x08
180 strged r2, [ip], #0x08
181 strged r2, [ip], #0x08
182 strged r2, [ip], #0x08
183 strged r2, [ip], #0x08
184 strged r2, [ip], #0x08
185 strged r2, [ip], #0x08
186 strged r2, [ip], #0x08
187 strged r2, [ip], #0x08
188 strged r2, [ip], #0x08
189 strged r2, [ip], #0x08
190 strged r2, [ip], #0x08
191 strged r2, [ip], #0x08
192 strged r2, [ip], #0x08
193 strged r2, [ip], #0x08
194 #else
195 stmgeia ip!, {r2-r3}
196 stmgeia ip!, {r2-r3}
197 stmgeia ip!, {r2-r3}
198 stmgeia ip!, {r2-r3}
199 stmgeia ip!, {r2-r3}
200 stmgeia ip!, {r2-r3}
201 stmgeia ip!, {r2-r3}
202 stmgeia ip!, {r2-r3}
203 stmgeia ip!, {r2-r3}
204 stmgeia ip!, {r2-r3}
205 stmgeia ip!, {r2-r3}
206 stmgeia ip!, {r2-r3}
207 stmgeia ip!, {r2-r3}
208 stmgeia ip!, {r2-r3}
209 stmgeia ip!, {r2-r3}
210 stmgeia ip!, {r2-r3}
211 #endif
212 bgt .Lmemset_loop128
213 RETeq /* Zero length so just exit */
214
215 add r1, r1, #0x80 /* Adjust for extra sub */
216
217 /* Do 32 bytes at a time */
218 .Lmemset_loop32:
219 subs r1, r1, #0x20
220 #ifdef _ARM_ARCH_5E
221 strged r2, [ip], #0x08
222 strged r2, [ip], #0x08
223 strged r2, [ip], #0x08
224 strged r2, [ip], #0x08
225 #else
226 stmgeia ip!, {r2-r3}
227 stmgeia ip!, {r2-r3}
228 stmgeia ip!, {r2-r3}
229 stmgeia ip!, {r2-r3}
230 #endif
231 bgt .Lmemset_loop32
232 RETeq /* Zero length so just exit */
233
234 adds r1, r1, #0x10 /* Partially adjust for extra sub */
235
236 /* Deal with 16 bytes or more */
237 #ifdef _ARM_ARCH_5E
238 strged r2, [ip], #0x08
239 strged r2, [ip], #0x08
240 #else
241 stmgeia ip!, {r2-r3}
242 stmgeia ip!, {r2-r3}
243 #endif
244 RETeq /* Zero length so just exit */
245
246 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
247
248 /* We have at least 4 bytes so copy as words */
249 .Lmemset_loop4:
250 subs r1, r1, #0x04
251 strge r3, [ip], #0x04
252 bgt .Lmemset_loop4
253 RETeq /* Zero length so just exit */
254
255 #ifdef _ARM_ARCH_5E
256 /* Compensate for 64-bit alignment check */
257 adds r1, r1, #0x04
258 RETeq
259 cmp r1, #2
260 #else
261 cmp r1, #-2
262 #endif
263
264 strb r3, [ip], #0x01 /* Set 1 byte */
265 strgeb r3, [ip], #0x01 /* Set another byte */
266 strgtb r3, [ip] /* and a third */
267 RET /* Exit */
268
269 .Lmemset_wordunaligned:
270 rsb r2, r2, #0x004
271 strb r3, [ip], #0x01 /* Set 1 byte */
272 cmp r2, #0x02
273 strgeb r3, [ip], #0x01 /* Set another byte */
274 sub r1, r1, r2
275 strgtb r3, [ip], #0x01 /* and a third */
276 cmp r1, #0x04 /* More than 4 bytes left? */
277 bge .Lmemset_wordaligned /* Yup */
278
279 .Lmemset_lessthanfour:
280 cmp r1, #0x00
281 RETeq /* Zero length so exit */
282 strb r3, [ip], #0x01 /* Set 1 byte */
283 cmp r1, #0x02
284 strgeb r3, [ip], #0x01 /* Set another byte */
285 strgtb r3, [ip] /* and a third */
286 RET /* Exit */
287
288 ENTRY(bcmp)
289 mov ip, r0
290 cmp r2, #0x06
291 beq .Lmemcmp_6bytes
292 mov r0, #0x00
293
294 /* Are both addresses aligned the same way? */
295 cmp r2, #0x00
296 eornes r3, ip, r1
297 RETeq /* len == 0, or same addresses! */
298 tst r3, #0x03
299 subne r2, r2, #0x01
300 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
301
302 /* Word-align the addresses, if necessary */
303 sub r3, r1, #0x05
304 ands r3, r3, #0x03
305 add r3, r3, r3, lsl #1
306 addne pc, pc, r3, lsl #3
307 nop
308
309 /* Compare up to 3 bytes */
310 ldrb r0, [ip], #0x01
311 ldrb r3, [r1], #0x01
312 subs r0, r0, r3
313 RETne
314 subs r2, r2, #0x01
315 RETeq
316
317 /* Compare up to 2 bytes */
318 ldrb r0, [ip], #0x01
319 ldrb r3, [r1], #0x01
320 subs r0, r0, r3
321 RETne
322 subs r2, r2, #0x01
323 RETeq
324
325 /* Compare 1 byte */
326 ldrb r0, [ip], #0x01
327 ldrb r3, [r1], #0x01
328 subs r0, r0, r3
329 RETne
330 subs r2, r2, #0x01
331 RETeq
332
333 /* Compare 4 bytes at a time, if possible */
334 subs r2, r2, #0x04
335 bcc .Lmemcmp_bytewise
336 .Lmemcmp_word_aligned:
337 ldr r0, [ip], #0x04
338 ldr r3, [r1], #0x04
339 subs r2, r2, #0x04
340 cmpcs r0, r3
341 beq .Lmemcmp_word_aligned
342 sub r0, r0, r3
343
344 /* Correct for extra subtraction, and check if done */
345 adds r2, r2, #0x04
346 cmpeq r0, #0x00 /* If done, did all bytes match? */
347 RETeq /* Yup. Just return */
348
349 /* Re-do the final word byte-wise */
350 sub ip, ip, #0x04
351 sub r1, r1, #0x04
352
353 .Lmemcmp_bytewise:
354 add r2, r2, #0x03
355 .Lmemcmp_bytewise2:
356 ldrb r0, [ip], #0x01
357 ldrb r3, [r1], #0x01
358 subs r2, r2, #0x01
359 cmpcs r0, r3
360 beq .Lmemcmp_bytewise2
361 sub r0, r0, r3
362 RET
363
364 /*
365 * 6 byte compares are very common, thanks to the network stack.
366 * This code is hand-scheduled to reduce the number of stalls for
367 * load results. Everything else being equal, this will be ~32%
368 * faster than a byte-wise memcmp.
369 */
370 .align 5
371 .Lmemcmp_6bytes:
372 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
373 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
374 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
375 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
376 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */
377 RETne /* Return if mismatch on #0 */
378 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
379 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */
380 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */
381 RETne /* Return if mismatch on #1 */
382 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
383 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
384 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */
385 RETne /* Return if mismatch on #2 */
386 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
387 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */
388 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */
389 RETne /* Return if mismatch on #3 */
390 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
391 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
392 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */
393 RETne /* Return if mismatch on #4 */
394 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
395 RET
396
397 ENTRY(bcopy)
398 /* switch the source and destination registers */
399 eor r0, r1, r0
400 eor r1, r0, r1
401 eor r0, r1, r0
402 ENTRY(memmove)
403 /* Do the buffers overlap? */
404 cmp r0, r1
405 RETeq /* Bail now if src/dst are the same */
406 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
407 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
408 cmp r3, r2 /* if (r3 < len) we have an overlap */
409 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
410
411 /* Determine copy direction */
412 cmp r1, r0
413 bcc .Lmemmove_backwards
414
415 moveq r0, #0 /* Quick abort for len=0 */
416 RETeq
417
418 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
419 subs r2, r2, #4
420 blt .Lmemmove_fl4 /* less than 4 bytes */
421 ands r12, r0, #3
422 bne .Lmemmove_fdestul /* oh unaligned destination addr */
423 ands r12, r1, #3
424 bne .Lmemmove_fsrcul /* oh unaligned source addr */
425
426 .Lmemmove_ft8:
427 /* We have aligned source and destination */
428 subs r2, r2, #8
429 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
430 subs r2, r2, #0x14
431 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
432 stmdb sp!, {r4} /* borrow r4 */
433
434 /* blat 32 bytes at a time */
435 /* XXX for really big copies perhaps we should use more registers */
436 .Lmemmove_floop32:
437 ldmia r1!, {r3, r4, r12, lr}
438 stmia r0!, {r3, r4, r12, lr}
439 ldmia r1!, {r3, r4, r12, lr}
440 stmia r0!, {r3, r4, r12, lr}
441 subs r2, r2, #0x20
442 bge .Lmemmove_floop32
443
444 cmn r2, #0x10
445 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
446 stmgeia r0!, {r3, r4, r12, lr}
447 subge r2, r2, #0x10
448 ldmia sp!, {r4} /* return r4 */
449
450 .Lmemmove_fl32:
451 adds r2, r2, #0x14
452
453 /* blat 12 bytes at a time */
454 .Lmemmove_floop12:
455 ldmgeia r1!, {r3, r12, lr}
456 stmgeia r0!, {r3, r12, lr}
457 subges r2, r2, #0x0c
458 bge .Lmemmove_floop12
459
460 .Lmemmove_fl12:
461 adds r2, r2, #8
462 blt .Lmemmove_fl4
463
464 subs r2, r2, #4
465 ldrlt r3, [r1], #4
466 strlt r3, [r0], #4
467 ldmgeia r1!, {r3, r12}
468 stmgeia r0!, {r3, r12}
469 subge r2, r2, #4
470
471 .Lmemmove_fl4:
472 /* less than 4 bytes to go */
473 adds r2, r2, #4
474 ldmeqia sp!, {r0, pc} /* done */
475
476 /* copy the crud byte at a time */
477 cmp r2, #2
478 ldrb r3, [r1], #1
479 strb r3, [r0], #1
480 ldrgeb r3, [r1], #1
481 strgeb r3, [r0], #1
482 ldrgtb r3, [r1], #1
483 strgtb r3, [r0], #1
484 ldmia sp!, {r0, pc}
485
486 /* erg - unaligned destination */
487 .Lmemmove_fdestul:
488 rsb r12, r12, #4
489 cmp r12, #2
490
491 /* align destination with byte copies */
492 ldrb r3, [r1], #1
493 strb r3, [r0], #1
494 ldrgeb r3, [r1], #1
495 strgeb r3, [r0], #1
496 ldrgtb r3, [r1], #1
497 strgtb r3, [r0], #1
498 subs r2, r2, r12
499 blt .Lmemmove_fl4 /* less the 4 bytes */
500
501 ands r12, r1, #3
502 beq .Lmemmove_ft8 /* we have an aligned source */
503
504 /* erg - unaligned source */
505 /* This is where it gets nasty ... */
506 .Lmemmove_fsrcul:
507 bic r1, r1, #3
508 ldr lr, [r1], #4
509 cmp r12, #2
510 bgt .Lmemmove_fsrcul3
511 beq .Lmemmove_fsrcul2
512 cmp r2, #0x0c
513 blt .Lmemmove_fsrcul1loop4
514 sub r2, r2, #0x0c
515 stmdb sp!, {r4, r5}
516
517 .Lmemmove_fsrcul1loop16:
518 #ifdef __ARMEB__
519 mov r3, lr, lsl #8
520 #else
521 mov r3, lr, lsr #8
522 #endif
523 ldmia r1!, {r4, r5, r12, lr}
524 #ifdef __ARMEB__
525 orr r3, r3, r4, lsr #24
526 mov r4, r4, lsl #8
527 orr r4, r4, r5, lsr #24
528 mov r5, r5, lsl #8
529 orr r5, r5, r12, lsr #24
530 mov r12, r12, lsl #8
531 orr r12, r12, lr, lsr #24
532 #else
533 orr r3, r3, r4, lsl #24
534 mov r4, r4, lsr #8
535 orr r4, r4, r5, lsl #24
536 mov r5, r5, lsr #8
537 orr r5, r5, r12, lsl #24
538 mov r12, r12, lsr #8
539 orr r12, r12, lr, lsl #24
540 #endif
541 stmia r0!, {r3-r5, r12}
542 subs r2, r2, #0x10
543 bge .Lmemmove_fsrcul1loop16
544 ldmia sp!, {r4, r5}
545 adds r2, r2, #0x0c
546 blt .Lmemmove_fsrcul1l4
547
548 .Lmemmove_fsrcul1loop4:
549 #ifdef __ARMEB__
550 mov r12, lr, lsl #8
551 #else
552 mov r12, lr, lsr #8
553 #endif
554 ldr lr, [r1], #4
555 #ifdef __ARMEB__
556 orr r12, r12, lr, lsr #24
557 #else
558 orr r12, r12, lr, lsl #24
559 #endif
560 str r12, [r0], #4
561 subs r2, r2, #4
562 bge .Lmemmove_fsrcul1loop4
563
564 .Lmemmove_fsrcul1l4:
565 sub r1, r1, #3
566 b .Lmemmove_fl4
567
568 .Lmemmove_fsrcul2:
569 cmp r2, #0x0c
570 blt .Lmemmove_fsrcul2loop4
571 sub r2, r2, #0x0c
572 stmdb sp!, {r4, r5}
573
574 .Lmemmove_fsrcul2loop16:
575 #ifdef __ARMEB__
576 mov r3, lr, lsl #16
577 #else
578 mov r3, lr, lsr #16
579 #endif
580 ldmia r1!, {r4, r5, r12, lr}
581 #ifdef __ARMEB__
582 orr r3, r3, r4, lsr #16
583 mov r4, r4, lsl #16
584 orr r4, r4, r5, lsr #16
585 mov r5, r5, lsl #16
586 orr r5, r5, r12, lsr #16
587 mov r12, r12, lsl #16
588 orr r12, r12, lr, lsr #16
589 #else
590 orr r3, r3, r4, lsl #16
591 mov r4, r4, lsr #16
592 orr r4, r4, r5, lsl #16
593 mov r5, r5, lsr #16
594 orr r5, r5, r12, lsl #16
595 mov r12, r12, lsr #16
596 orr r12, r12, lr, lsl #16
597 #endif
598 stmia r0!, {r3-r5, r12}
599 subs r2, r2, #0x10
600 bge .Lmemmove_fsrcul2loop16
601 ldmia sp!, {r4, r5}
602 adds r2, r2, #0x0c
603 blt .Lmemmove_fsrcul2l4
604
605 .Lmemmove_fsrcul2loop4:
606 #ifdef __ARMEB__
607 mov r12, lr, lsl #16
608 #else
609 mov r12, lr, lsr #16
610 #endif
611 ldr lr, [r1], #4
612 #ifdef __ARMEB__
613 orr r12, r12, lr, lsr #16
614 #else
615 orr r12, r12, lr, lsl #16
616 #endif
617 str r12, [r0], #4
618 subs r2, r2, #4
619 bge .Lmemmove_fsrcul2loop4
620
621 .Lmemmove_fsrcul2l4:
622 sub r1, r1, #2
623 b .Lmemmove_fl4
624
625 .Lmemmove_fsrcul3:
626 cmp r2, #0x0c
627 blt .Lmemmove_fsrcul3loop4
628 sub r2, r2, #0x0c
629 stmdb sp!, {r4, r5}
630
631 .Lmemmove_fsrcul3loop16:
632 #ifdef __ARMEB__
633 mov r3, lr, lsl #24
634 #else
635 mov r3, lr, lsr #24
636 #endif
637 ldmia r1!, {r4, r5, r12, lr}
638 #ifdef __ARMEB__
639 orr r3, r3, r4, lsr #8
640 mov r4, r4, lsl #24
641 orr r4, r4, r5, lsr #8
642 mov r5, r5, lsl #24
643 orr r5, r5, r12, lsr #8
644 mov r12, r12, lsl #24
645 orr r12, r12, lr, lsr #8
646 #else
647 orr r3, r3, r4, lsl #8
648 mov r4, r4, lsr #24
649 orr r4, r4, r5, lsl #8
650 mov r5, r5, lsr #24
651 orr r5, r5, r12, lsl #8
652 mov r12, r12, lsr #24
653 orr r12, r12, lr, lsl #8
654 #endif
655 stmia r0!, {r3-r5, r12}
656 subs r2, r2, #0x10
657 bge .Lmemmove_fsrcul3loop16
658 ldmia sp!, {r4, r5}
659 adds r2, r2, #0x0c
660 blt .Lmemmove_fsrcul3l4
661
662 .Lmemmove_fsrcul3loop4:
663 #ifdef __ARMEB__
664 mov r12, lr, lsl #24
665 #else
666 mov r12, lr, lsr #24
667 #endif
668 ldr lr, [r1], #4
669 #ifdef __ARMEB__
670 orr r12, r12, lr, lsr #8
671 #else
672 orr r12, r12, lr, lsl #8
673 #endif
674 str r12, [r0], #4
675 subs r2, r2, #4
676 bge .Lmemmove_fsrcul3loop4
677
678 .Lmemmove_fsrcul3l4:
679 sub r1, r1, #1
680 b .Lmemmove_fl4
681
682 .Lmemmove_backwards:
683 add r1, r1, r2
684 add r0, r0, r2
685 subs r2, r2, #4
686 blt .Lmemmove_bl4 /* less than 4 bytes */
687 ands r12, r0, #3
688 bne .Lmemmove_bdestul /* oh unaligned destination addr */
689 ands r12, r1, #3
690 bne .Lmemmove_bsrcul /* oh unaligned source addr */
691
692 .Lmemmove_bt8:
693 /* We have aligned source and destination */
694 subs r2, r2, #8
695 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
696 stmdb sp!, {r4, lr}
697 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
698 blt .Lmemmove_bl32
699
700 /* blat 32 bytes at a time */
701 /* XXX for really big copies perhaps we should use more registers */
702 .Lmemmove_bloop32:
703 ldmdb r1!, {r3, r4, r12, lr}
704 stmdb r0!, {r3, r4, r12, lr}
705 ldmdb r1!, {r3, r4, r12, lr}
706 stmdb r0!, {r3, r4, r12, lr}
707 subs r2, r2, #0x20
708 bge .Lmemmove_bloop32
709
710 .Lmemmove_bl32:
711 cmn r2, #0x10
712 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
713 stmgedb r0!, {r3, r4, r12, lr}
714 subge r2, r2, #0x10
715 adds r2, r2, #0x14
716 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
717 stmgedb r0!, {r3, r12, lr}
718 subge r2, r2, #0x0c
719 ldmia sp!, {r4, lr}
720
721 .Lmemmove_bl12:
722 adds r2, r2, #8
723 blt .Lmemmove_bl4
724 subs r2, r2, #4
725 ldrlt r3, [r1, #-4]!
726 strlt r3, [r0, #-4]!
727 ldmgedb r1!, {r3, r12}
728 stmgedb r0!, {r3, r12}
729 subge r2, r2, #4
730
731 .Lmemmove_bl4:
732 /* less than 4 bytes to go */
733 adds r2, r2, #4
734 RETeq /* done */
735
736 /* copy the crud byte at a time */
737 cmp r2, #2
738 ldrb r3, [r1, #-1]!
739 strb r3, [r0, #-1]!
740 ldrgeb r3, [r1, #-1]!
741 strgeb r3, [r0, #-1]!
742 ldrgtb r3, [r1, #-1]!
743 strgtb r3, [r0, #-1]!
744 RET
745
746 /* erg - unaligned destination */
747 .Lmemmove_bdestul:
748 cmp r12, #2
749
750 /* align destination with byte copies */
751 ldrb r3, [r1, #-1]!
752 strb r3, [r0, #-1]!
753 ldrgeb r3, [r1, #-1]!
754 strgeb r3, [r0, #-1]!
755 ldrgtb r3, [r1, #-1]!
756 strgtb r3, [r0, #-1]!
757 subs r2, r2, r12
758 blt .Lmemmove_bl4 /* less than 4 bytes to go */
759 ands r12, r1, #3
760 beq .Lmemmove_bt8 /* we have an aligned source */
761
762 /* erg - unaligned source */
763 /* This is where it gets nasty ... */
764 .Lmemmove_bsrcul:
765 bic r1, r1, #3
766 ldr r3, [r1, #0]
767 cmp r12, #2
768 blt .Lmemmove_bsrcul1
769 beq .Lmemmove_bsrcul2
770 cmp r2, #0x0c
771 blt .Lmemmove_bsrcul3loop4
772 sub r2, r2, #0x0c
773 stmdb sp!, {r4, r5, lr}
774
775 .Lmemmove_bsrcul3loop16:
776 #ifdef __ARMEB__
777 mov lr, r3, lsr #8
778 #else
779 mov lr, r3, lsl #8
780 #endif
781 ldmdb r1!, {r3-r5, r12}
782 #ifdef __ARMEB__
783 orr lr, lr, r12, lsl #24
784 mov r12, r12, lsr #8
785 orr r12, r12, r5, lsl #24
786 mov r5, r5, lsr #8
787 orr r5, r5, r4, lsl #24
788 mov r4, r4, lsr #8
789 orr r4, r4, r3, lsl #24
790 #else
791 orr lr, lr, r12, lsr #24
792 mov r12, r12, lsl #8
793 orr r12, r12, r5, lsr #24
794 mov r5, r5, lsl #8
795 orr r5, r5, r4, lsr #24
796 mov r4, r4, lsl #8
797 orr r4, r4, r3, lsr #24
798 #endif
799 stmdb r0!, {r4, r5, r12, lr}
800 subs r2, r2, #0x10
801 bge .Lmemmove_bsrcul3loop16
802 ldmia sp!, {r4, r5, lr}
803 adds r2, r2, #0x0c
804 blt .Lmemmove_bsrcul3l4
805
806 .Lmemmove_bsrcul3loop4:
807 #ifdef __ARMEB__
808 mov r12, r3, lsr #8
809 #else
810 mov r12, r3, lsl #8
811 #endif
812 ldr r3, [r1, #-4]!
813 #ifdef __ARMEB__
814 orr r12, r12, r3, lsl #24
815 #else
816 orr r12, r12, r3, lsr #24
817 #endif
818 str r12, [r0, #-4]!
819 subs r2, r2, #4
820 bge .Lmemmove_bsrcul3loop4
821
822 .Lmemmove_bsrcul3l4:
823 add r1, r1, #3
824 b .Lmemmove_bl4
825
826 .Lmemmove_bsrcul2:
827 cmp r2, #0x0c
828 blt .Lmemmove_bsrcul2loop4
829 sub r2, r2, #0x0c
830 stmdb sp!, {r4, r5, lr}
831
832 .Lmemmove_bsrcul2loop16:
833 #ifdef __ARMEB__
834 mov lr, r3, lsr #16
835 #else
836 mov lr, r3, lsl #16
837 #endif
838 ldmdb r1!, {r3-r5, r12}
839 #ifdef __ARMEB__
840 orr lr, lr, r12, lsl #16
841 mov r12, r12, lsr #16
842 orr r12, r12, r5, lsl #16
843 mov r5, r5, lsr #16
844 orr r5, r5, r4, lsl #16
845 mov r4, r4, lsr #16
846 orr r4, r4, r3, lsl #16
847 #else
848 orr lr, lr, r12, lsr #16
849 mov r12, r12, lsl #16
850 orr r12, r12, r5, lsr #16
851 mov r5, r5, lsl #16
852 orr r5, r5, r4, lsr #16
853 mov r4, r4, lsl #16
854 orr r4, r4, r3, lsr #16
855 #endif
856 stmdb r0!, {r4, r5, r12, lr}
857 subs r2, r2, #0x10
858 bge .Lmemmove_bsrcul2loop16
859 ldmia sp!, {r4, r5, lr}
860 adds r2, r2, #0x0c
861 blt .Lmemmove_bsrcul2l4
862
863 .Lmemmove_bsrcul2loop4:
864 #ifdef __ARMEB__
865 mov r12, r3, lsr #16
866 #else
867 mov r12, r3, lsl #16
868 #endif
869 ldr r3, [r1, #-4]!
870 #ifdef __ARMEB__
871 orr r12, r12, r3, lsl #16
872 #else
873 orr r12, r12, r3, lsr #16
874 #endif
875 str r12, [r0, #-4]!
876 subs r2, r2, #4
877 bge .Lmemmove_bsrcul2loop4
878
879 .Lmemmove_bsrcul2l4:
880 add r1, r1, #2
881 b .Lmemmove_bl4
882
883 .Lmemmove_bsrcul1:
884 cmp r2, #0x0c
885 blt .Lmemmove_bsrcul1loop4
886 sub r2, r2, #0x0c
887 stmdb sp!, {r4, r5, lr}
888
889 .Lmemmove_bsrcul1loop32:
890 #ifdef __ARMEB__
891 mov lr, r3, lsr #24
892 #else
893 mov lr, r3, lsl #24
894 #endif
895 ldmdb r1!, {r3-r5, r12}
896 #ifdef __ARMEB__
897 orr lr, lr, r12, lsl #8
898 mov r12, r12, lsr #24
899 orr r12, r12, r5, lsl #8
900 mov r5, r5, lsr #24
901 orr r5, r5, r4, lsl #8
902 mov r4, r4, lsr #24
903 orr r4, r4, r3, lsl #8
904 #else
905 orr lr, lr, r12, lsr #8
906 mov r12, r12, lsl #24
907 orr r12, r12, r5, lsr #8
908 mov r5, r5, lsl #24
909 orr r5, r5, r4, lsr #8
910 mov r4, r4, lsl #24
911 orr r4, r4, r3, lsr #8
912 #endif
913 stmdb r0!, {r4, r5, r12, lr}
914 subs r2, r2, #0x10
915 bge .Lmemmove_bsrcul1loop32
916 ldmia sp!, {r4, r5, lr}
917 adds r2, r2, #0x0c
918 blt .Lmemmove_bsrcul1l4
919
920 .Lmemmove_bsrcul1loop4:
921 #ifdef __ARMEB__
922 mov r12, r3, lsr #24
923 #else
924 mov r12, r3, lsl #24
925 #endif
926 ldr r3, [r1, #-4]!
927 #ifdef __ARMEB__
928 orr r12, r12, r3, lsl #8
929 #else
930 orr r12, r12, r3, lsr #8
931 #endif
932 str r12, [r0, #-4]!
933 subs r2, r2, #4
934 bge .Lmemmove_bsrcul1loop4
935
936 .Lmemmove_bsrcul1l4:
937 add r1, r1, #1
938 b .Lmemmove_bl4
939
940 #if !defined(_ARM_ARCH_5E)
941 ENTRY(memcpy)
942 /* save leaf functions having to store this away */
943 /* Do not check arm_memcpy if we're running from flash */
944 #ifdef FLASHADDR
945 #if FLASHADDR > PHYSADDR
946 ldr r3, =FLASHADDR
947 cmp r3, pc
948 bls .Lnormal
949 #else
950 ldr r3, =FLASHADDR
951 cmp r3, pc
952 bhi .Lnormal
953 #endif
954 #endif
955 ldr r3, .L_arm_memcpy
956 ldr r3, [r3]
957 cmp r3, #0
958 beq .Lnormal
959 ldr r3, .L_min_memcpy_size
960 ldr r3, [r3]
961 cmp r2, r3
962 blt .Lnormal
963 stmfd sp!, {r0-r2, r4, lr}
964 mov r3, #0
965 ldr r4, .L_arm_memcpy
966 mov lr, pc
967 ldr pc, [r4]
968 cmp r0, #0
969 ldmfd sp!, {r0-r2, r4, lr}
970 RETeq
971
972 .Lnormal:
973 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
974
975 subs r2, r2, #4
976 blt .Lmemcpy_l4 /* less than 4 bytes */
977 ands r12, r0, #3
978 bne .Lmemcpy_destul /* oh unaligned destination addr */
979 ands r12, r1, #3
980 bne .Lmemcpy_srcul /* oh unaligned source addr */
981
982 .Lmemcpy_t8:
983 /* We have aligned source and destination */
984 subs r2, r2, #8
985 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
986 subs r2, r2, #0x14
987 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
988 stmdb sp!, {r4} /* borrow r4 */
989
990 /* blat 32 bytes at a time */
991 /* XXX for really big copies perhaps we should use more registers */
992 .Lmemcpy_loop32:
993 ldmia r1!, {r3, r4, r12, lr}
994 stmia r0!, {r3, r4, r12, lr}
995 ldmia r1!, {r3, r4, r12, lr}
996 stmia r0!, {r3, r4, r12, lr}
997 subs r2, r2, #0x20
998 bge .Lmemcpy_loop32
999
1000 cmn r2, #0x10
1001 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
1002 stmgeia r0!, {r3, r4, r12, lr}
1003 subge r2, r2, #0x10
1004 ldmia sp!, {r4} /* return r4 */
1005
1006 .Lmemcpy_l32:
1007 adds r2, r2, #0x14
1008
1009 /* blat 12 bytes at a time */
1010 .Lmemcpy_loop12:
1011 ldmgeia r1!, {r3, r12, lr}
1012 stmgeia r0!, {r3, r12, lr}
1013 subges r2, r2, #0x0c
1014 bge .Lmemcpy_loop12
1015
1016 .Lmemcpy_l12:
1017 adds r2, r2, #8
1018 blt .Lmemcpy_l4
1019
1020 subs r2, r2, #4
1021 ldrlt r3, [r1], #4
1022 strlt r3, [r0], #4
1023 ldmgeia r1!, {r3, r12}
1024 stmgeia r0!, {r3, r12}
1025 subge r2, r2, #4
1026
1027 .Lmemcpy_l4:
1028 /* less than 4 bytes to go */
1029 adds r2, r2, #4
1030 #ifdef __APCS_26_
1031 ldmeqia sp!, {r0, pc}^ /* done */
1032 #else
1033 ldmeqia sp!, {r0, pc} /* done */
1034 #endif
1035 /* copy the crud byte at a time */
1036 cmp r2, #2
1037 ldrb r3, [r1], #1
1038 strb r3, [r0], #1
1039 ldrgeb r3, [r1], #1
1040 strgeb r3, [r0], #1
1041 ldrgtb r3, [r1], #1
1042 strgtb r3, [r0], #1
1043 ldmia sp!, {r0, pc}
1044
1045 /* erg - unaligned destination */
1046 .Lmemcpy_destul:
1047 rsb r12, r12, #4
1048 cmp r12, #2
1049
1050 /* align destination with byte copies */
1051 ldrb r3, [r1], #1
1052 strb r3, [r0], #1
1053 ldrgeb r3, [r1], #1
1054 strgeb r3, [r0], #1
1055 ldrgtb r3, [r1], #1
1056 strgtb r3, [r0], #1
1057 subs r2, r2, r12
1058 blt .Lmemcpy_l4 /* less the 4 bytes */
1059
1060 ands r12, r1, #3
1061 beq .Lmemcpy_t8 /* we have an aligned source */
1062
1063 /* erg - unaligned source */
1064 /* This is where it gets nasty ... */
1065 .Lmemcpy_srcul:
1066 bic r1, r1, #3
1067 ldr lr, [r1], #4
1068 cmp r12, #2
1069 bgt .Lmemcpy_srcul3
1070 beq .Lmemcpy_srcul2
1071 cmp r2, #0x0c
1072 blt .Lmemcpy_srcul1loop4
1073 sub r2, r2, #0x0c
1074 stmdb sp!, {r4, r5}
1075
1076 .Lmemcpy_srcul1loop16:
1077 mov r3, lr, lsr #8
1078 ldmia r1!, {r4, r5, r12, lr}
1079 orr r3, r3, r4, lsl #24
1080 mov r4, r4, lsr #8
1081 orr r4, r4, r5, lsl #24
1082 mov r5, r5, lsr #8
1083 orr r5, r5, r12, lsl #24
1084 mov r12, r12, lsr #8
1085 orr r12, r12, lr, lsl #24
1086 stmia r0!, {r3-r5, r12}
1087 subs r2, r2, #0x10
1088 bge .Lmemcpy_srcul1loop16
1089 ldmia sp!, {r4, r5}
1090 adds r2, r2, #0x0c
1091 blt .Lmemcpy_srcul1l4
1092
1093 .Lmemcpy_srcul1loop4:
1094 mov r12, lr, lsr #8
1095 ldr lr, [r1], #4
1096 orr r12, r12, lr, lsl #24
1097 str r12, [r0], #4
1098 subs r2, r2, #4
1099 bge .Lmemcpy_srcul1loop4
1100
1101 .Lmemcpy_srcul1l4:
1102 sub r1, r1, #3
1103 b .Lmemcpy_l4
1104
1105 .Lmemcpy_srcul2:
1106 cmp r2, #0x0c
1107 blt .Lmemcpy_srcul2loop4
1108 sub r2, r2, #0x0c
1109 stmdb sp!, {r4, r5}
1110
1111 .Lmemcpy_srcul2loop16:
1112 mov r3, lr, lsr #16
1113 ldmia r1!, {r4, r5, r12, lr}
1114 orr r3, r3, r4, lsl #16
1115 mov r4, r4, lsr #16
1116 orr r4, r4, r5, lsl #16
1117 mov r5, r5, lsr #16
1118 orr r5, r5, r12, lsl #16
1119 mov r12, r12, lsr #16
1120 orr r12, r12, lr, lsl #16
1121 stmia r0!, {r3-r5, r12}
1122 subs r2, r2, #0x10
1123 bge .Lmemcpy_srcul2loop16
1124 ldmia sp!, {r4, r5}
1125 adds r2, r2, #0x0c
1126 blt .Lmemcpy_srcul2l4
1127
1128 .Lmemcpy_srcul2loop4:
1129 mov r12, lr, lsr #16
1130 ldr lr, [r1], #4
1131 orr r12, r12, lr, lsl #16
1132 str r12, [r0], #4
1133 subs r2, r2, #4
1134 bge .Lmemcpy_srcul2loop4
1135
1136 .Lmemcpy_srcul2l4:
1137 sub r1, r1, #2
1138 b .Lmemcpy_l4
1139
1140 .Lmemcpy_srcul3:
1141 cmp r2, #0x0c
1142 blt .Lmemcpy_srcul3loop4
1143 sub r2, r2, #0x0c
1144 stmdb sp!, {r4, r5}
1145
1146 .Lmemcpy_srcul3loop16:
1147 mov r3, lr, lsr #24
1148 ldmia r1!, {r4, r5, r12, lr}
1149 orr r3, r3, r4, lsl #8
1150 mov r4, r4, lsr #24
1151 orr r4, r4, r5, lsl #8
1152 mov r5, r5, lsr #24
1153 orr r5, r5, r12, lsl #8
1154 mov r12, r12, lsr #24
1155 orr r12, r12, lr, lsl #8
1156 stmia r0!, {r3-r5, r12}
1157 subs r2, r2, #0x10
1158 bge .Lmemcpy_srcul3loop16
1159 ldmia sp!, {r4, r5}
1160 adds r2, r2, #0x0c
1161 blt .Lmemcpy_srcul3l4
1162
1163 .Lmemcpy_srcul3loop4:
1164 mov r12, lr, lsr #24
1165 ldr lr, [r1], #4
1166 orr r12, r12, lr, lsl #8
1167 str r12, [r0], #4
1168 subs r2, r2, #4
1169 bge .Lmemcpy_srcul3loop4
1170
1171 .Lmemcpy_srcul3l4:
1172 sub r1, r1, #1
1173 b .Lmemcpy_l4
1174 #else
1175 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1176 ENTRY(memcpy)
1177 pld [r1]
1178 cmp r2, #0x0c
1179 ble .Lmemcpy_short /* <= 12 bytes */
1180 #ifdef FLASHADDR
1181 #if FLASHADDR > PHYSADDR
1182 ldr r3, =FLASHADDR
1183 cmp r3, pc
1184 bls .Lnormal
1185 #else
1186 ldr r3, =FLASHADDR
1187 cmp r3, pc
1188 bhi .Lnormal
1189 #endif
1190 #endif
1191 ldr r3, .L_arm_memcpy
1192 ldr r3, [r3]
1193 cmp r3, #0
1194 beq .Lnormal
1195 ldr r3, .L_min_memcpy_size
1196 ldr r3, [r3]
1197 cmp r2, r3
1198 blt .Lnormal
1199 stmfd sp!, {r0-r2, r4, lr}
1200 mov r3, #0
1201 ldr r4, .L_arm_memcpy
1202 mov lr, pc
1203 ldr pc, [r4]
1204 cmp r0, #0
1205 ldmfd sp!, {r0-r2, r4, lr}
1206 RETeq
1207 .Lnormal:
1208 mov r3, r0 /* We must not clobber r0 */
1209
1210 /* Word-align the destination buffer */
1211 ands ip, r3, #0x03 /* Already word aligned? */
1212 beq .Lmemcpy_wordaligned /* Yup */
1213 cmp ip, #0x02
1214 ldrb ip, [r1], #0x01
1215 sub r2, r2, #0x01
1216 strb ip, [r3], #0x01
1217 ldrleb ip, [r1], #0x01
1218 suble r2, r2, #0x01
1219 strleb ip, [r3], #0x01
1220 ldrltb ip, [r1], #0x01
1221 sublt r2, r2, #0x01
1222 strltb ip, [r3], #0x01
1223
1224 /* Destination buffer is now word aligned */
1225 .Lmemcpy_wordaligned:
1226 ands ip, r1, #0x03 /* Is src also word-aligned? */
1227 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1228
1229 /* Quad-align the destination buffer */
1230 tst r3, #0x07 /* Already quad aligned? */
1231 ldrne ip, [r1], #0x04
1232 stmfd sp!, {r4-r9} /* Free up some registers */
1233 subne r2, r2, #0x04
1234 strne ip, [r3], #0x04
1235
1236 /* Destination buffer quad aligned, source is at least word aligned */
1237 subs r2, r2, #0x80
1238 blt .Lmemcpy_w_lessthan128
1239
1240 /* Copy 128 bytes at a time */
1241 .Lmemcpy_w_loop128:
1242 ldr r4, [r1], #0x04 /* LD:00-03 */
1243 ldr r5, [r1], #0x04 /* LD:04-07 */
1244 pld [r1, #0x18] /* Prefetch 0x20 */
1245 ldr r6, [r1], #0x04 /* LD:08-0b */
1246 ldr r7, [r1], #0x04 /* LD:0c-0f */
1247 ldr r8, [r1], #0x04 /* LD:10-13 */
1248 ldr r9, [r1], #0x04 /* LD:14-17 */
1249 strd r4, [r3], #0x08 /* ST:00-07 */
1250 ldr r4, [r1], #0x04 /* LD:18-1b */
1251 ldr r5, [r1], #0x04 /* LD:1c-1f */
1252 strd r6, [r3], #0x08 /* ST:08-0f */
1253 ldr r6, [r1], #0x04 /* LD:20-23 */
1254 ldr r7, [r1], #0x04 /* LD:24-27 */
1255 pld [r1, #0x18] /* Prefetch 0x40 */
1256 strd r8, [r3], #0x08 /* ST:10-17 */
1257 ldr r8, [r1], #0x04 /* LD:28-2b */
1258 ldr r9, [r1], #0x04 /* LD:2c-2f */
1259 strd r4, [r3], #0x08 /* ST:18-1f */
1260 ldr r4, [r1], #0x04 /* LD:30-33 */
1261 ldr r5, [r1], #0x04 /* LD:34-37 */
1262 strd r6, [r3], #0x08 /* ST:20-27 */
1263 ldr r6, [r1], #0x04 /* LD:38-3b */
1264 ldr r7, [r1], #0x04 /* LD:3c-3f */
1265 strd r8, [r3], #0x08 /* ST:28-2f */
1266 ldr r8, [r1], #0x04 /* LD:40-43 */
1267 ldr r9, [r1], #0x04 /* LD:44-47 */
1268 pld [r1, #0x18] /* Prefetch 0x60 */
1269 strd r4, [r3], #0x08 /* ST:30-37 */
1270 ldr r4, [r1], #0x04 /* LD:48-4b */
1271 ldr r5, [r1], #0x04 /* LD:4c-4f */
1272 strd r6, [r3], #0x08 /* ST:38-3f */
1273 ldr r6, [r1], #0x04 /* LD:50-53 */
1274 ldr r7, [r1], #0x04 /* LD:54-57 */
1275 strd r8, [r3], #0x08 /* ST:40-47 */
1276 ldr r8, [r1], #0x04 /* LD:58-5b */
1277 ldr r9, [r1], #0x04 /* LD:5c-5f */
1278 strd r4, [r3], #0x08 /* ST:48-4f */
1279 ldr r4, [r1], #0x04 /* LD:60-63 */
1280 ldr r5, [r1], #0x04 /* LD:64-67 */
1281 pld [r1, #0x18] /* Prefetch 0x80 */
1282 strd r6, [r3], #0x08 /* ST:50-57 */
1283 ldr r6, [r1], #0x04 /* LD:68-6b */
1284 ldr r7, [r1], #0x04 /* LD:6c-6f */
1285 strd r8, [r3], #0x08 /* ST:58-5f */
1286 ldr r8, [r1], #0x04 /* LD:70-73 */
1287 ldr r9, [r1], #0x04 /* LD:74-77 */
1288 strd r4, [r3], #0x08 /* ST:60-67 */
1289 ldr r4, [r1], #0x04 /* LD:78-7b */
1290 ldr r5, [r1], #0x04 /* LD:7c-7f */
1291 strd r6, [r3], #0x08 /* ST:68-6f */
1292 strd r8, [r3], #0x08 /* ST:70-77 */
1293 subs r2, r2, #0x80
1294 strd r4, [r3], #0x08 /* ST:78-7f */
1295 bge .Lmemcpy_w_loop128
1296
1297 .Lmemcpy_w_lessthan128:
1298 adds r2, r2, #0x80 /* Adjust for extra sub */
1299 ldmeqfd sp!, {r4-r9}
1300 RETeq /* Return now if done */
1301 subs r2, r2, #0x20
1302 blt .Lmemcpy_w_lessthan32
1303
1304 /* Copy 32 bytes at a time */
1305 .Lmemcpy_w_loop32:
1306 ldr r4, [r1], #0x04
1307 ldr r5, [r1], #0x04
1308 pld [r1, #0x18]
1309 ldr r6, [r1], #0x04
1310 ldr r7, [r1], #0x04
1311 ldr r8, [r1], #0x04
1312 ldr r9, [r1], #0x04
1313 strd r4, [r3], #0x08
1314 ldr r4, [r1], #0x04
1315 ldr r5, [r1], #0x04
1316 strd r6, [r3], #0x08
1317 strd r8, [r3], #0x08
1318 subs r2, r2, #0x20
1319 strd r4, [r3], #0x08
1320 bge .Lmemcpy_w_loop32
1321
1322 .Lmemcpy_w_lessthan32:
1323 adds r2, r2, #0x20 /* Adjust for extra sub */
1324 ldmeqfd sp!, {r4-r9}
1325 RETeq /* Return now if done */
1326
1327 and r4, r2, #0x18
1328 rsbs r4, r4, #0x18
1329 addne pc, pc, r4, lsl #1
1330 nop
1331
1332 /* At least 24 bytes remaining */
1333 ldr r4, [r1], #0x04
1334 ldr r5, [r1], #0x04
1335 sub r2, r2, #0x08
1336 strd r4, [r3], #0x08
1337
1338 /* At least 16 bytes remaining */
1339 ldr r4, [r1], #0x04
1340 ldr r5, [r1], #0x04
1341 sub r2, r2, #0x08
1342 strd r4, [r3], #0x08
1343
1344 /* At least 8 bytes remaining */
1345 ldr r4, [r1], #0x04
1346 ldr r5, [r1], #0x04
1347 subs r2, r2, #0x08
1348 strd r4, [r3], #0x08
1349
1350 /* Less than 8 bytes remaining */
1351 ldmfd sp!, {r4-r9}
1352 RETeq /* Return now if done */
1353 subs r2, r2, #0x04
1354 ldrge ip, [r1], #0x04
1355 strge ip, [r3], #0x04
1356 RETeq /* Return now if done */
1357 addlt r2, r2, #0x04
1358 ldrb ip, [r1], #0x01
1359 cmp r2, #0x02
1360 ldrgeb r2, [r1], #0x01
1361 strb ip, [r3], #0x01
1362 ldrgtb ip, [r1]
1363 strgeb r2, [r3], #0x01
1364 strgtb ip, [r3]
1365 RET
1366
1367
1368 /*
1369 * At this point, it has not been possible to word align both buffers.
1370 * The destination buffer is word aligned, but the source buffer is not.
1371 */
1372 .Lmemcpy_bad_align:
1373 stmfd sp!, {r4-r7}
1374 bic r1, r1, #0x03
1375 cmp ip, #2
1376 ldr ip, [r1], #0x04
1377 bgt .Lmemcpy_bad3
1378 beq .Lmemcpy_bad2
1379 b .Lmemcpy_bad1
1380
1381 .Lmemcpy_bad1_loop16:
1382 #ifdef __ARMEB__
1383 mov r4, ip, lsl #8
1384 #else
1385 mov r4, ip, lsr #8
1386 #endif
1387 ldr r5, [r1], #0x04
1388 pld [r1, #0x018]
1389 ldr r6, [r1], #0x04
1390 ldr r7, [r1], #0x04
1391 ldr ip, [r1], #0x04
1392 #ifdef __ARMEB__
1393 orr r4, r4, r5, lsr #24
1394 mov r5, r5, lsl #8
1395 orr r5, r5, r6, lsr #24
1396 mov r6, r6, lsl #8
1397 orr r6, r6, r7, lsr #24
1398 mov r7, r7, lsl #8
1399 orr r7, r7, ip, lsr #24
1400 #else
1401 orr r4, r4, r5, lsl #24
1402 mov r5, r5, lsr #8
1403 orr r5, r5, r6, lsl #24
1404 mov r6, r6, lsr #8
1405 orr r6, r6, r7, lsl #24
1406 mov r7, r7, lsr #8
1407 orr r7, r7, ip, lsl #24
1408 #endif
1409 str r4, [r3], #0x04
1410 str r5, [r3], #0x04
1411 str r6, [r3], #0x04
1412 str r7, [r3], #0x04
1413 .Lmemcpy_bad1:
1414 subs r2, r2, #0x10
1415 bge .Lmemcpy_bad1_loop16
1416
1417 adds r2, r2, #0x10
1418 ldmeqfd sp!, {r4-r7}
1419 RETeq /* Return now if done */
1420 subs r2, r2, #0x04
1421 sublt r1, r1, #0x03
1422 blt .Lmemcpy_bad_done
1423
1424 .Lmemcpy_bad1_loop4:
1425 #ifdef __ARMEB__
1426 mov r4, ip, lsl #8
1427 #else
1428 mov r4, ip, lsr #8
1429 #endif
1430 ldr ip, [r1], #0x04
1431 subs r2, r2, #0x04
1432 #ifdef __ARMEB__
1433 orr r4, r4, ip, lsr #24
1434 #else
1435 orr r4, r4, ip, lsl #24
1436 #endif
1437 str r4, [r3], #0x04
1438 bge .Lmemcpy_bad1_loop4
1439 sub r1, r1, #0x03
1440 b .Lmemcpy_bad_done
1441
1442 .Lmemcpy_bad2_loop16:
1443 #ifdef __ARMEB__
1444 mov r4, ip, lsl #16
1445 #else
1446 mov r4, ip, lsr #16
1447 #endif
1448 ldr r5, [r1], #0x04
1449 pld [r1, #0x018]
1450 ldr r6, [r1], #0x04
1451 ldr r7, [r1], #0x04
1452 ldr ip, [r1], #0x04
1453 #ifdef __ARMEB__
1454 orr r4, r4, r5, lsr #16
1455 mov r5, r5, lsl #16
1456 orr r5, r5, r6, lsr #16
1457 mov r6, r6, lsl #16
1458 orr r6, r6, r7, lsr #16
1459 mov r7, r7, lsl #16
1460 orr r7, r7, ip, lsr #16
1461 #else
1462 orr r4, r4, r5, lsl #16
1463 mov r5, r5, lsr #16
1464 orr r5, r5, r6, lsl #16
1465 mov r6, r6, lsr #16
1466 orr r6, r6, r7, lsl #16
1467 mov r7, r7, lsr #16
1468 orr r7, r7, ip, lsl #16
1469 #endif
1470 str r4, [r3], #0x04
1471 str r5, [r3], #0x04
1472 str r6, [r3], #0x04
1473 str r7, [r3], #0x04
1474 .Lmemcpy_bad2:
1475 subs r2, r2, #0x10
1476 bge .Lmemcpy_bad2_loop16
1477
1478 adds r2, r2, #0x10
1479 ldmeqfd sp!, {r4-r7}
1480 RETeq /* Return now if done */
1481 subs r2, r2, #0x04
1482 sublt r1, r1, #0x02
1483 blt .Lmemcpy_bad_done
1484
1485 .Lmemcpy_bad2_loop4:
1486 #ifdef __ARMEB__
1487 mov r4, ip, lsl #16
1488 #else
1489 mov r4, ip, lsr #16
1490 #endif
1491 ldr ip, [r1], #0x04
1492 subs r2, r2, #0x04
1493 #ifdef __ARMEB__
1494 orr r4, r4, ip, lsr #16
1495 #else
1496 orr r4, r4, ip, lsl #16
1497 #endif
1498 str r4, [r3], #0x04
1499 bge .Lmemcpy_bad2_loop4
1500 sub r1, r1, #0x02
1501 b .Lmemcpy_bad_done
1502
1503 .Lmemcpy_bad3_loop16:
1504 #ifdef __ARMEB__
1505 mov r4, ip, lsl #24
1506 #else
1507 mov r4, ip, lsr #24
1508 #endif
1509 ldr r5, [r1], #0x04
1510 pld [r1, #0x018]
1511 ldr r6, [r1], #0x04
1512 ldr r7, [r1], #0x04
1513 ldr ip, [r1], #0x04
1514 #ifdef __ARMEB__
1515 orr r4, r4, r5, lsr #8
1516 mov r5, r5, lsl #24
1517 orr r5, r5, r6, lsr #8
1518 mov r6, r6, lsl #24
1519 orr r6, r6, r7, lsr #8
1520 mov r7, r7, lsl #24
1521 orr r7, r7, ip, lsr #8
1522 #else
1523 orr r4, r4, r5, lsl #8
1524 mov r5, r5, lsr #24
1525 orr r5, r5, r6, lsl #8
1526 mov r6, r6, lsr #24
1527 orr r6, r6, r7, lsl #8
1528 mov r7, r7, lsr #24
1529 orr r7, r7, ip, lsl #8
1530 #endif
1531 str r4, [r3], #0x04
1532 str r5, [r3], #0x04
1533 str r6, [r3], #0x04
1534 str r7, [r3], #0x04
1535 .Lmemcpy_bad3:
1536 subs r2, r2, #0x10
1537 bge .Lmemcpy_bad3_loop16
1538
1539 adds r2, r2, #0x10
1540 ldmeqfd sp!, {r4-r7}
1541 RETeq /* Return now if done */
1542 subs r2, r2, #0x04
1543 sublt r1, r1, #0x01
1544 blt .Lmemcpy_bad_done
1545
1546 .Lmemcpy_bad3_loop4:
1547 #ifdef __ARMEB__
1548 mov r4, ip, lsl #24
1549 #else
1550 mov r4, ip, lsr #24
1551 #endif
1552 ldr ip, [r1], #0x04
1553 subs r2, r2, #0x04
1554 #ifdef __ARMEB__
1555 orr r4, r4, ip, lsr #8
1556 #else
1557 orr r4, r4, ip, lsl #8
1558 #endif
1559 str r4, [r3], #0x04
1560 bge .Lmemcpy_bad3_loop4
1561 sub r1, r1, #0x01
1562
1563 .Lmemcpy_bad_done:
1564 ldmfd sp!, {r4-r7}
1565 adds r2, r2, #0x04
1566 RETeq
1567 ldrb ip, [r1], #0x01
1568 cmp r2, #0x02
1569 ldrgeb r2, [r1], #0x01
1570 strb ip, [r3], #0x01
1571 ldrgtb ip, [r1]
1572 strgeb r2, [r3], #0x01
1573 strgtb ip, [r3]
1574 RET
1575
1576
1577 /*
1578 * Handle short copies (less than 16 bytes), possibly misaligned.
1579 * Some of these are *very* common, thanks to the network stack,
1580 * and so are handled specially.
1581 */
1582 .Lmemcpy_short:
1583 add pc, pc, r2, lsl #2
1584 nop
1585 RET /* 0x00 */
1586 b .Lmemcpy_bytewise /* 0x01 */
1587 b .Lmemcpy_bytewise /* 0x02 */
1588 b .Lmemcpy_bytewise /* 0x03 */
1589 b .Lmemcpy_4 /* 0x04 */
1590 b .Lmemcpy_bytewise /* 0x05 */
1591 b .Lmemcpy_6 /* 0x06 */
1592 b .Lmemcpy_bytewise /* 0x07 */
1593 b .Lmemcpy_8 /* 0x08 */
1594 b .Lmemcpy_bytewise /* 0x09 */
1595 b .Lmemcpy_bytewise /* 0x0a */
1596 b .Lmemcpy_bytewise /* 0x0b */
1597 b .Lmemcpy_c /* 0x0c */
1598 .Lmemcpy_bytewise:
1599 mov r3, r0 /* We must not clobber r0 */
1600 ldrb ip, [r1], #0x01
1601 1: subs r2, r2, #0x01
1602 strb ip, [r3], #0x01
1603 ldrneb ip, [r1], #0x01
1604 bne 1b
1605 RET
1606
1607 /******************************************************************************
1608 * Special case for 4 byte copies
1609 */
1610 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1611 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1612 LMEMCPY_4_PAD
1613 .Lmemcpy_4:
1614 and r2, r1, #0x03
1615 orr r2, r2, r0, lsl #2
1616 ands r2, r2, #0x0f
1617 sub r3, pc, #0x14
1618 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1619
1620 /*
1621 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1622 */
1623 ldr r2, [r1]
1624 str r2, [r0]
1625 RET
1626 LMEMCPY_4_PAD
1627
1628 /*
1629 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1630 */
1631 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1632 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1633 #ifdef __ARMEB__
1634 mov r3, r3, lsl #8 /* r3 = 012. */
1635 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1636 #else
1637 mov r3, r3, lsr #8 /* r3 = .210 */
1638 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1639 #endif
1640 str r3, [r0]
1641 RET
1642 LMEMCPY_4_PAD
1643
1644 /*
1645 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1646 */
1647 #ifdef __ARMEB__
1648 ldrh r3, [r1]
1649 ldrh r2, [r1, #0x02]
1650 #else
1651 ldrh r3, [r1, #0x02]
1652 ldrh r2, [r1]
1653 #endif
1654 orr r3, r2, r3, lsl #16
1655 str r3, [r0]
1656 RET
1657 LMEMCPY_4_PAD
1658
1659 /*
1660 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1661 */
1662 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1663 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1664 #ifdef __ARMEB__
1665 mov r3, r3, lsl #24 /* r3 = 0... */
1666 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1667 #else
1668 mov r3, r3, lsr #24 /* r3 = ...0 */
1669 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1670 #endif
1671 str r3, [r0]
1672 RET
1673 LMEMCPY_4_PAD
1674
1675 /*
1676 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1677 */
1678 ldr r2, [r1]
1679 #ifdef __ARMEB__
1680 strb r2, [r0, #0x03]
1681 mov r3, r2, lsr #8
1682 mov r1, r2, lsr #24
1683 strb r1, [r0]
1684 #else
1685 strb r2, [r0]
1686 mov r3, r2, lsr #8
1687 mov r1, r2, lsr #24
1688 strb r1, [r0, #0x03]
1689 #endif
1690 strh r3, [r0, #0x01]
1691 RET
1692 LMEMCPY_4_PAD
1693
1694 /*
1695 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1696 */
1697 ldrb r2, [r1]
1698 ldrh r3, [r1, #0x01]
1699 ldrb r1, [r1, #0x03]
1700 strb r2, [r0]
1701 strh r3, [r0, #0x01]
1702 strb r1, [r0, #0x03]
1703 RET
1704 LMEMCPY_4_PAD
1705
1706 /*
1707 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1708 */
1709 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1710 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1711 #ifdef __ARMEB__
1712 mov r1, r2, lsr #8 /* r1 = ...0 */
1713 strb r1, [r0]
1714 mov r2, r2, lsl #8 /* r2 = .01. */
1715 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1716 #else
1717 strb r2, [r0]
1718 mov r2, r2, lsr #8 /* r2 = ...1 */
1719 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1720 mov r3, r3, lsr #8 /* r3 = ...3 */
1721 #endif
1722 strh r2, [r0, #0x01]
1723 strb r3, [r0, #0x03]
1724 RET
1725 LMEMCPY_4_PAD
1726
1727 /*
1728 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1729 */
1730 ldrb r2, [r1]
1731 ldrh r3, [r1, #0x01]
1732 ldrb r1, [r1, #0x03]
1733 strb r2, [r0]
1734 strh r3, [r0, #0x01]
1735 strb r1, [r0, #0x03]
1736 RET
1737 LMEMCPY_4_PAD
1738
1739 /*
1740 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1741 */
1742 ldr r2, [r1]
1743 #ifdef __ARMEB__
1744 strh r2, [r0, #0x02]
1745 mov r3, r2, lsr #16
1746 strh r3, [r0]
1747 #else
1748 strh r2, [r0]
1749 mov r3, r2, lsr #16
1750 strh r3, [r0, #0x02]
1751 #endif
1752 RET
1753 LMEMCPY_4_PAD
1754
1755 /*
1756 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1757 */
1758 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1759 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1760 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1761 strh r1, [r0]
1762 #ifdef __ARMEB__
1763 mov r2, r2, lsl #8 /* r2 = 012. */
1764 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1765 #else
1766 mov r2, r2, lsr #24 /* r2 = ...2 */
1767 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1768 #endif
1769 strh r2, [r0, #0x02]
1770 RET
1771 LMEMCPY_4_PAD
1772
1773 /*
1774 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1775 */
1776 ldrh r2, [r1]
1777 ldrh r3, [r1, #0x02]
1778 strh r2, [r0]
1779 strh r3, [r0, #0x02]
1780 RET
1781 LMEMCPY_4_PAD
1782
1783 /*
1784 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1785 */
1786 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1787 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1788 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1789 strh r1, [r0, #0x02]
1790 #ifdef __ARMEB__
1791 mov r3, r3, lsr #24 /* r3 = ...1 */
1792 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1793 #else
1794 mov r3, r3, lsl #8 /* r3 = 321. */
1795 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1796 #endif
1797 strh r3, [r0]
1798 RET
1799 LMEMCPY_4_PAD
1800
1801 /*
1802 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1803 */
1804 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1805 #ifdef __ARMEB__
1806 strb r2, [r0, #0x03]
1807 mov r3, r2, lsr #8
1808 mov r1, r2, lsr #24
1809 strh r3, [r0, #0x01]
1810 strb r1, [r0]
1811 #else
1812 strb r2, [r0]
1813 mov r3, r2, lsr #8
1814 mov r1, r2, lsr #24
1815 strh r3, [r0, #0x01]
1816 strb r1, [r0, #0x03]
1817 #endif
1818 RET
1819 LMEMCPY_4_PAD
1820
1821 /*
1822 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1823 */
1824 ldrb r2, [r1]
1825 ldrh r3, [r1, #0x01]
1826 ldrb r1, [r1, #0x03]
1827 strb r2, [r0]
1828 strh r3, [r0, #0x01]
1829 strb r1, [r0, #0x03]
1830 RET
1831 LMEMCPY_4_PAD
1832
1833 /*
1834 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1835 */
1836 #ifdef __ARMEB__
1837 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1838 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1839 strb r3, [r0, #0x03]
1840 mov r3, r3, lsr #8 /* r3 = ...2 */
1841 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1842 strh r3, [r0, #0x01]
1843 mov r2, r2, lsr #8 /* r2 = ...0 */
1844 strb r2, [r0]
1845 #else
1846 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1847 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1848 strb r2, [r0]
1849 mov r2, r2, lsr #8 /* r2 = ...1 */
1850 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1851 strh r2, [r0, #0x01]
1852 mov r3, r3, lsr #8 /* r3 = ...3 */
1853 strb r3, [r0, #0x03]
1854 #endif
1855 RET
1856 LMEMCPY_4_PAD
1857
1858 /*
1859 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1860 */
1861 ldrb r2, [r1]
1862 ldrh r3, [r1, #0x01]
1863 ldrb r1, [r1, #0x03]
1864 strb r2, [r0]
1865 strh r3, [r0, #0x01]
1866 strb r1, [r0, #0x03]
1867 RET
1868 LMEMCPY_4_PAD
1869
1870
1871 /******************************************************************************
1872 * Special case for 6 byte copies
1873 */
1874 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1875 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1876 LMEMCPY_6_PAD
1877 .Lmemcpy_6:
1878 and r2, r1, #0x03
1879 orr r2, r2, r0, lsl #2
1880 ands r2, r2, #0x0f
1881 sub r3, pc, #0x14
1882 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1883
1884 /*
1885 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1886 */
1887 ldr r2, [r1]
1888 ldrh r3, [r1, #0x04]
1889 str r2, [r0]
1890 strh r3, [r0, #0x04]
1891 RET
1892 LMEMCPY_6_PAD
1893
1894 /*
1895 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1896 */
1897 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1898 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1899 #ifdef __ARMEB__
1900 mov r2, r2, lsl #8 /* r2 = 012. */
1901 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1902 #else
1903 mov r2, r2, lsr #8 /* r2 = .210 */
1904 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1905 #endif
1906 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1907 str r2, [r0]
1908 strh r3, [r0, #0x04]
1909 RET
1910 LMEMCPY_6_PAD
1911
1912 /*
1913 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1914 */
1915 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1916 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1917 #ifdef __ARMEB__
1918 mov r1, r3, lsr #16 /* r1 = ..23 */
1919 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1920 str r1, [r0]
1921 strh r3, [r0, #0x04]
1922 #else
1923 mov r1, r3, lsr #16 /* r1 = ..54 */
1924 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1925 str r2, [r0]
1926 strh r1, [r0, #0x04]
1927 #endif
1928 RET
1929 LMEMCPY_6_PAD
1930
1931 /*
1932 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1933 */
1934 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1935 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1936 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1937 #ifdef __ARMEB__
1938 mov r2, r2, lsl #24 /* r2 = 0... */
1939 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1940 mov r3, r3, lsl #8 /* r3 = 234. */
1941 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1942 #else
1943 mov r2, r2, lsr #24 /* r2 = ...0 */
1944 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1945 mov r1, r1, lsl #8 /* r1 = xx5. */
1946 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1947 #endif
1948 str r2, [r0]
1949 strh r1, [r0, #0x04]
1950 RET
1951 LMEMCPY_6_PAD
1952
1953 /*
1954 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1955 */
1956 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1957 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1958 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1959 strh r1, [r0, #0x01]
1960 #ifdef __ARMEB__
1961 mov r1, r3, lsr #24 /* r1 = ...0 */
1962 strb r1, [r0]
1963 mov r3, r3, lsl #8 /* r3 = 123. */
1964 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1965 #else
1966 strb r3, [r0]
1967 mov r3, r3, lsr #24 /* r3 = ...3 */
1968 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1969 mov r2, r2, lsr #8 /* r2 = ...5 */
1970 #endif
1971 strh r3, [r0, #0x03]
1972 strb r2, [r0, #0x05]
1973 RET
1974 LMEMCPY_6_PAD
1975
1976 /*
1977 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1978 */
1979 ldrb r2, [r1]
1980 ldrh r3, [r1, #0x01]
1981 ldrh ip, [r1, #0x03]
1982 ldrb r1, [r1, #0x05]
1983 strb r2, [r0]
1984 strh r3, [r0, #0x01]
1985 strh ip, [r0, #0x03]
1986 strb r1, [r0, #0x05]
1987 RET
1988 LMEMCPY_6_PAD
1989
1990 /*
1991 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1992 */
1993 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1994 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1995 #ifdef __ARMEB__
1996 mov r3, r2, lsr #8 /* r3 = ...0 */
1997 strb r3, [r0]
1998 strb r1, [r0, #0x05]
1999 mov r3, r1, lsr #8 /* r3 = .234 */
2000 strh r3, [r0, #0x03]
2001 mov r3, r2, lsl #8 /* r3 = .01. */
2002 orr r3, r3, r1, lsr #24 /* r3 = .012 */
2003 strh r3, [r0, #0x01]
2004 #else
2005 strb r2, [r0]
2006 mov r3, r1, lsr #24
2007 strb r3, [r0, #0x05]
2008 mov r3, r1, lsr #8 /* r3 = .543 */
2009 strh r3, [r0, #0x03]
2010 mov r3, r2, lsr #8 /* r3 = ...1 */
2011 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2012 strh r3, [r0, #0x01]
2013 #endif
2014 RET
2015 LMEMCPY_6_PAD
2016
2017 /*
2018 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2019 */
2020 ldrb r2, [r1]
2021 ldrh r3, [r1, #0x01]
2022 ldrh ip, [r1, #0x03]
2023 ldrb r1, [r1, #0x05]
2024 strb r2, [r0]
2025 strh r3, [r0, #0x01]
2026 strh ip, [r0, #0x03]
2027 strb r1, [r0, #0x05]
2028 RET
2029 LMEMCPY_6_PAD
2030
2031 /*
2032 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2033 */
2034 #ifdef __ARMEB__
2035 ldr r2, [r1] /* r2 = 0123 */
2036 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2037 mov r1, r2, lsr #16 /* r1 = ..01 */
2038 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2039 strh r1, [r0]
2040 str r3, [r0, #0x02]
2041 #else
2042 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2043 ldr r3, [r1] /* r3 = 3210 */
2044 mov r2, r2, lsl #16 /* r2 = 54.. */
2045 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2046 strh r3, [r0]
2047 str r2, [r0, #0x02]
2048 #endif
2049 RET
2050 LMEMCPY_6_PAD
2051
2052 /*
2053 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2054 */
2055 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2056 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2057 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2058 #ifdef __ARMEB__
2059 mov r2, r2, lsr #8 /* r2 = .345 */
2060 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2061 #else
2062 mov r2, r2, lsl #8 /* r2 = 543. */
2063 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2064 #endif
2065 strh r1, [r0]
2066 str r2, [r0, #0x02]
2067 RET
2068 LMEMCPY_6_PAD
2069
2070 /*
2071 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2072 */
2073 ldrh r2, [r1]
2074 ldr r3, [r1, #0x02]
2075 strh r2, [r0]
2076 str r3, [r0, #0x02]
2077 RET
2078 LMEMCPY_6_PAD
2079
2080 /*
2081 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2082 */
2083 ldrb r3, [r1] /* r3 = ...0 */
2084 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2085 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2086 #ifdef __ARMEB__
2087 mov r3, r3, lsl #8 /* r3 = ..0. */
2088 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2089 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2090 #else
2091 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2092 mov r1, r1, lsl #24 /* r1 = 5... */
2093 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2094 #endif
2095 strh r3, [r0]
2096 str r1, [r0, #0x02]
2097 RET
2098 LMEMCPY_6_PAD
2099
2100 /*
2101 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2102 */
2103 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2104 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2105 #ifdef __ARMEB__
2106 mov r3, r2, lsr #24 /* r3 = ...0 */
2107 strb r3, [r0]
2108 mov r2, r2, lsl #8 /* r2 = 123. */
2109 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2110 #else
2111 strb r2, [r0]
2112 mov r2, r2, lsr #8 /* r2 = .321 */
2113 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2114 mov r1, r1, lsr #8 /* r1 = ...5 */
2115 #endif
2116 str r2, [r0, #0x01]
2117 strb r1, [r0, #0x05]
2118 RET
2119 LMEMCPY_6_PAD
2120
2121 /*
2122 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2123 */
2124 ldrb r2, [r1]
2125 ldrh r3, [r1, #0x01]
2126 ldrh ip, [r1, #0x03]
2127 ldrb r1, [r1, #0x05]
2128 strb r2, [r0]
2129 strh r3, [r0, #0x01]
2130 strh ip, [r0, #0x03]
2131 strb r1, [r0, #0x05]
2132 RET
2133 LMEMCPY_6_PAD
2134
2135 /*
2136 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2137 */
2138 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2139 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2140 #ifdef __ARMEB__
2141 mov r3, r2, lsr #8 /* r3 = ...0 */
2142 strb r3, [r0]
2143 mov r2, r2, lsl #24 /* r2 = 1... */
2144 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2145 #else
2146 strb r2, [r0]
2147 mov r2, r2, lsr #8 /* r2 = ...1 */
2148 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2149 mov r1, r1, lsr #24 /* r1 = ...5 */
2150 #endif
2151 str r2, [r0, #0x01]
2152 strb r1, [r0, #0x05]
2153 RET
2154 LMEMCPY_6_PAD
2155
2156 /*
2157 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2158 */
2159 ldrb r2, [r1]
2160 ldr r3, [r1, #0x01]
2161 ldrb r1, [r1, #0x05]
2162 strb r2, [r0]
2163 str r3, [r0, #0x01]
2164 strb r1, [r0, #0x05]
2165 RET
2166 LMEMCPY_6_PAD
2167
2168
2169 /******************************************************************************
2170 * Special case for 8 byte copies
2171 */
2172 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2173 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2174 LMEMCPY_8_PAD
2175 .Lmemcpy_8:
2176 and r2, r1, #0x03
2177 orr r2, r2, r0, lsl #2
2178 ands r2, r2, #0x0f
2179 sub r3, pc, #0x14
2180 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2181
2182 /*
2183 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2184 */
2185 ldr r2, [r1]
2186 ldr r3, [r1, #0x04]
2187 str r2, [r0]
2188 str r3, [r0, #0x04]
2189 RET
2190 LMEMCPY_8_PAD
2191
2192 /*
2193 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2194 */
2195 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2196 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2197 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2198 #ifdef __ARMEB__
2199 mov r3, r3, lsl #8 /* r3 = 012. */
2200 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2201 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2202 #else
2203 mov r3, r3, lsr #8 /* r3 = .210 */
2204 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2205 mov r1, r1, lsl #24 /* r1 = 7... */
2206 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2207 #endif
2208 str r3, [r0]
2209 str r2, [r0, #0x04]
2210 RET
2211 LMEMCPY_8_PAD
2212
2213 /*
2214 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2215 */
2216 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2217 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2218 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2219 #ifdef __ARMEB__
2220 mov r2, r2, lsl #16 /* r2 = 01.. */
2221 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2222 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2223 #else
2224 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2225 mov r3, r3, lsr #16 /* r3 = ..54 */
2226 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2227 #endif
2228 str r2, [r0]
2229 str r3, [r0, #0x04]
2230 RET
2231 LMEMCPY_8_PAD
2232
2233 /*
2234 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2235 */
2236 ldrb r3, [r1] /* r3 = ...0 */
2237 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2238 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2239 #ifdef __ARMEB__
2240 mov r3, r3, lsl #24 /* r3 = 0... */
2241 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2242 mov r2, r2, lsl #24 /* r2 = 4... */
2243 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2244 #else
2245 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2246 mov r2, r2, lsr #24 /* r2 = ...4 */
2247 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2248 #endif
2249 str r3, [r0]
2250 str r2, [r0, #0x04]
2251 RET
2252 LMEMCPY_8_PAD
2253
2254 /*
2255 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2256 */
2257 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2258 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2259 #ifdef __ARMEB__
2260 mov r1, r3, lsr #24 /* r1 = ...0 */
2261 strb r1, [r0]
2262 mov r1, r3, lsr #8 /* r1 = .012 */
2263 strb r2, [r0, #0x07]
2264 mov r3, r3, lsl #24 /* r3 = 3... */
2265 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2266 #else
2267 strb r3, [r0]
2268 mov r1, r2, lsr #24 /* r1 = ...7 */
2269 strb r1, [r0, #0x07]
2270 mov r1, r3, lsr #8 /* r1 = .321 */
2271 mov r3, r3, lsr #24 /* r3 = ...3 */
2272 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2273 #endif
2274 strh r1, [r0, #0x01]
2275 str r3, [r0, #0x03]
2276 RET
2277 LMEMCPY_8_PAD
2278
2279 /*
2280 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2281 */
2282 ldrb r2, [r1]
2283 ldrh r3, [r1, #0x01]
2284 ldr ip, [r1, #0x03]
2285 ldrb r1, [r1, #0x07]
2286 strb r2, [r0]
2287 strh r3, [r0, #0x01]
2288 str ip, [r0, #0x03]
2289 strb r1, [r0, #0x07]
2290 RET
2291 LMEMCPY_8_PAD
2292
2293 /*
2294 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2295 */
2296 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2297 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2298 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2299 #ifdef __ARMEB__
2300 mov ip, r2, lsr #8 /* ip = ...0 */
2301 strb ip, [r0]
2302 mov ip, r2, lsl #8 /* ip = .01. */
2303 orr ip, ip, r3, lsr #24 /* ip = .012 */
2304 strb r1, [r0, #0x07]
2305 mov r3, r3, lsl #8 /* r3 = 345. */
2306 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2307 #else
2308 strb r2, [r0] /* 0 */
2309 mov ip, r1, lsr #8 /* ip = ...7 */
2310 strb ip, [r0, #0x07] /* 7 */
2311 mov ip, r2, lsr #8 /* ip = ...1 */
2312 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2313 mov r3, r3, lsr #8 /* r3 = .543 */
2314 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2315 #endif
2316 strh ip, [r0, #0x01]
2317 str r3, [r0, #0x03]
2318 RET
2319 LMEMCPY_8_PAD
2320
2321 /*
2322 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2323 */
2324 ldrb r3, [r1] /* r3 = ...0 */
2325 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2326 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2327 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2328 strb r3, [r0]
2329 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2330 #ifdef __ARMEB__
2331 strh r3, [r0, #0x01]
2332 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2333 #else
2334 strh ip, [r0, #0x01]
2335 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2336 #endif
2337 str r2, [r0, #0x03]
2338 strb r1, [r0, #0x07]
2339 RET
2340 LMEMCPY_8_PAD
2341
2342 /*
2343 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2344 */
2345 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2346 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2347 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2348 #ifdef __ARMEB__
2349 strh r1, [r0]
2350 mov r1, r3, lsr #16 /* r1 = ..45 */
2351 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2352 #else
2353 strh r2, [r0]
2354 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2355 mov r3, r3, lsr #16 /* r3 = ..76 */
2356 #endif
2357 str r2, [r0, #0x02]
2358 strh r3, [r0, #0x06]
2359 RET
2360 LMEMCPY_8_PAD
2361
2362 /*
2363 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2364 */
2365 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2366 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2367 ldrb ip, [r1, #0x07] /* ip = ...7 */
2368 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2369 strh r1, [r0]
2370 #ifdef __ARMEB__
2371 mov r1, r2, lsl #24 /* r1 = 2... */
2372 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2373 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2374 #else
2375 mov r1, r2, lsr #24 /* r1 = ...2 */
2376 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2377 mov r3, r3, lsr #24 /* r3 = ...6 */
2378 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2379 #endif
2380 str r1, [r0, #0x02]
2381 strh r3, [r0, #0x06]
2382 RET
2383 LMEMCPY_8_PAD
2384
2385 /*
2386 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2387 */
2388 ldrh r2, [r1]
2389 ldr ip, [r1, #0x02]
2390 ldrh r3, [r1, #0x06]
2391 strh r2, [r0]
2392 str ip, [r0, #0x02]
2393 strh r3, [r0, #0x06]
2394 RET
2395 LMEMCPY_8_PAD
2396
2397 /*
2398 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2399 */
2400 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2401 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2402 ldrb ip, [r1] /* ip = ...0 */
2403 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2404 strh r1, [r0, #0x06]
2405 #ifdef __ARMEB__
2406 mov r3, r3, lsr #24 /* r3 = ...5 */
2407 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2408 mov r2, r2, lsr #24 /* r2 = ...1 */
2409 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2410 #else
2411 mov r3, r3, lsl #24 /* r3 = 5... */
2412 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2413 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2414 #endif
2415 str r3, [r0, #0x02]
2416 strh r2, [r0]
2417 RET
2418 LMEMCPY_8_PAD
2419
2420 /*
2421 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2422 */
2423 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2424 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2425 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2426 strh r1, [r0, #0x05]
2427 #ifdef __ARMEB__
2428 strb r3, [r0, #0x07]
2429 mov r1, r2, lsr #24 /* r1 = ...0 */
2430 strb r1, [r0]
2431 mov r2, r2, lsl #8 /* r2 = 123. */
2432 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2433 str r2, [r0, #0x01]
2434 #else
2435 strb r2, [r0]
2436 mov r1, r3, lsr #24 /* r1 = ...7 */
2437 strb r1, [r0, #0x07]
2438 mov r2, r2, lsr #8 /* r2 = .321 */
2439 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2440 str r2, [r0, #0x01]
2441 #endif
2442 RET
2443 LMEMCPY_8_PAD
2444
2445 /*
2446 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2447 */
2448 ldrb r3, [r1] /* r3 = ...0 */
2449 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2450 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2451 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2452 strb r3, [r0]
2453 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2454 #ifdef __ARMEB__
2455 strh ip, [r0, #0x05]
2456 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2457 #else
2458 strh r3, [r0, #0x05]
2459 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2460 #endif
2461 str r2, [r0, #0x01]
2462 strb r1, [r0, #0x07]
2463 RET
2464 LMEMCPY_8_PAD
2465
2466 /*
2467 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2468 */
2469 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2470 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2471 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2472 #ifdef __ARMEB__
2473 mov ip, r2, lsr #8 /* ip = ...0 */
2474 strb ip, [r0]
2475 mov ip, r2, lsl #24 /* ip = 1... */
2476 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2477 strb r1, [r0, #0x07]
2478 mov r1, r1, lsr #8 /* r1 = ...6 */
2479 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2480 #else
2481 strb r2, [r0]
2482 mov ip, r2, lsr #8 /* ip = ...1 */
2483 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2484 mov r2, r1, lsr #8 /* r2 = ...7 */
2485 strb r2, [r0, #0x07]
2486 mov r1, r1, lsl #8 /* r1 = .76. */
2487 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2488 #endif
2489 str ip, [r0, #0x01]
2490 strh r1, [r0, #0x05]
2491 RET
2492 LMEMCPY_8_PAD
2493
2494 /*
2495 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2496 */
2497 ldrb r2, [r1]
2498 ldr ip, [r1, #0x01]
2499 ldrh r3, [r1, #0x05]
2500 ldrb r1, [r1, #0x07]
2501 strb r2, [r0]
2502 str ip, [r0, #0x01]
2503 strh r3, [r0, #0x05]
2504 strb r1, [r0, #0x07]
2505 RET
2506 LMEMCPY_8_PAD
2507
2508 /******************************************************************************
2509 * Special case for 12 byte copies
2510 */
2511 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2512 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2513 LMEMCPY_C_PAD
2514 .Lmemcpy_c:
2515 and r2, r1, #0x03
2516 orr r2, r2, r0, lsl #2
2517 ands r2, r2, #0x0f
2518 sub r3, pc, #0x14
2519 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2520
2521 /*
2522 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2523 */
2524 ldr r2, [r1]
2525 ldr r3, [r1, #0x04]
2526 ldr r1, [r1, #0x08]
2527 str r2, [r0]
2528 str r3, [r0, #0x04]
2529 str r1, [r0, #0x08]
2530 RET
2531 LMEMCPY_C_PAD
2532
2533 /*
2534 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2535 */
2536 ldrb r2, [r1, #0xb] /* r2 = ...B */
2537 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2538 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2539 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2540 #ifdef __ARMEB__
2541 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2542 str r2, [r0, #0x08]
2543 mov r2, ip, lsr #24 /* r2 = ...7 */
2544 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2545 mov r1, r1, lsl #8 /* r1 = 012. */
2546 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2547 #else
2548 mov r2, r2, lsl #24 /* r2 = B... */
2549 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2550 str r2, [r0, #0x08]
2551 mov r2, ip, lsl #24 /* r2 = 7... */
2552 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2553 mov r1, r1, lsr #8 /* r1 = .210 */
2554 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2555 #endif
2556 str r2, [r0, #0x04]
2557 str r1, [r0]
2558 RET
2559 LMEMCPY_C_PAD
2560
2561 /*
2562 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2563 */
2564 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2565 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2566 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2567 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2568 #ifdef __ARMEB__
2569 mov r2, r2, lsl #16 /* r2 = 01.. */
2570 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2571 str r2, [r0]
2572 mov r3, r3, lsl #16 /* r3 = 45.. */
2573 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2574 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2575 #else
2576 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2577 str r2, [r0]
2578 mov r3, r3, lsr #16 /* r3 = ..54 */
2579 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2580 mov r1, r1, lsl #16 /* r1 = BA.. */
2581 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2582 #endif
2583 str r3, [r0, #0x04]
2584 str r1, [r0, #0x08]
2585 RET
2586 LMEMCPY_C_PAD
2587
2588 /*
2589 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2590 */
2591 ldrb r2, [r1] /* r2 = ...0 */
2592 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2593 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2594 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2595 #ifdef __ARMEB__
2596 mov r2, r2, lsl #24 /* r2 = 0... */
2597 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2598 str r2, [r0]
2599 mov r3, r3, lsl #24 /* r3 = 4... */
2600 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2601 mov r1, r1, lsr #8 /* r1 = .9AB */
2602 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2603 #else
2604 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2605 str r2, [r0]
2606 mov r3, r3, lsr #24 /* r3 = ...4 */
2607 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2608 mov r1, r1, lsl #8 /* r1 = BA9. */
2609 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2610 #endif
2611 str r3, [r0, #0x04]
2612 str r1, [r0, #0x08]
2613 RET
2614 LMEMCPY_C_PAD
2615
2616 /*
2617 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2618 */
2619 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2620 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2621 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2622 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2623 strh r1, [r0, #0x01]
2624 #ifdef __ARMEB__
2625 mov r1, r2, lsr #24 /* r1 = ...0 */
2626 strb r1, [r0]
2627 mov r1, r2, lsl #24 /* r1 = 3... */
2628 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2629 mov r1, r3, lsl #24 /* r1 = 7... */
2630 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2631 #else
2632 strb r2, [r0]
2633 mov r1, r2, lsr #24 /* r1 = ...3 */
2634 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2635 mov r1, r3, lsr #24 /* r1 = ...7 */
2636 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2637 mov ip, ip, lsr #24 /* ip = ...B */
2638 #endif
2639 str r2, [r0, #0x03]
2640 str r1, [r0, #0x07]
2641 strb ip, [r0, #0x0b]
2642 RET
2643 LMEMCPY_C_PAD
2644
2645 /*
2646 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2647 */
2648 ldrb r2, [r1]
2649 ldrh r3, [r1, #0x01]
2650 ldr ip, [r1, #0x03]
2651 strb r2, [r0]
2652 ldr r2, [r1, #0x07]
2653 ldrb r1, [r1, #0x0b]
2654 strh r3, [r0, #0x01]
2655 str ip, [r0, #0x03]
2656 str r2, [r0, #0x07]
2657 strb r1, [r0, #0x0b]
2658 RET
2659 LMEMCPY_C_PAD
2660
2661 /*
2662 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2663 */
2664 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2665 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2666 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2667 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2668 #ifdef __ARMEB__
2669 mov r2, r2, ror #8 /* r2 = 1..0 */
2670 strb r2, [r0]
2671 mov r2, r2, lsr #16 /* r2 = ..1. */
2672 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2673 strh r2, [r0, #0x01]
2674 mov r2, r3, lsl #8 /* r2 = 345. */
2675 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2676 mov r2, ip, lsl #8 /* r2 = 789. */
2677 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2678 #else
2679 strb r2, [r0]
2680 mov r2, r2, lsr #8 /* r2 = ...1 */
2681 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2682 strh r2, [r0, #0x01]
2683 mov r2, r3, lsr #8 /* r2 = .543 */
2684 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2685 mov r2, ip, lsr #8 /* r2 = .987 */
2686 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2687 mov r1, r1, lsr #8 /* r1 = ...B */
2688 #endif
2689 str r3, [r0, #0x03]
2690 str r2, [r0, #0x07]
2691 strb r1, [r0, #0x0b]
2692 RET
2693 LMEMCPY_C_PAD
2694
2695 /*
2696 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2697 */
2698 ldrb r2, [r1]
2699 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2700 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2701 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2702 strb r2, [r0]
2703 #ifdef __ARMEB__
2704 mov r2, r3, lsr #16 /* r2 = ..12 */
2705 strh r2, [r0, #0x01]
2706 mov r3, r3, lsl #16 /* r3 = 34.. */
2707 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2708 mov ip, ip, lsl #16 /* ip = 78.. */
2709 orr ip, ip, r1, lsr #16 /* ip = 789A */
2710 mov r1, r1, lsr #8 /* r1 = .9AB */
2711 #else
2712 strh r3, [r0, #0x01]
2713 mov r3, r3, lsr #16 /* r3 = ..43 */
2714 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2715 mov ip, ip, lsr #16 /* ip = ..87 */
2716 orr ip, ip, r1, lsl #16 /* ip = A987 */
2717 mov r1, r1, lsr #16 /* r1 = ..xB */
2718 #endif
2719 str r3, [r0, #0x03]
2720 str ip, [r0, #0x07]
2721 strb r1, [r0, #0x0b]
2722 RET
2723 LMEMCPY_C_PAD
2724
2725 /*
2726 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2727 */
2728 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2729 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2730 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2731 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2732 #ifdef __ARMEB__
2733 strh r1, [r0]
2734 mov r1, ip, lsl #16 /* r1 = 23.. */
2735 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2736 mov r3, r3, lsl #16 /* r3 = 67.. */
2737 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2738 #else
2739 strh ip, [r0]
2740 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2741 mov r3, r3, lsr #16 /* r3 = ..76 */
2742 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2743 mov r2, r2, lsr #16 /* r2 = ..BA */
2744 #endif
2745 str r1, [r0, #0x02]
2746 str r3, [r0, #0x06]
2747 strh r2, [r0, #0x0a]
2748 RET
2749 LMEMCPY_C_PAD
2750
2751 /*
2752 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2753 */
2754 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2755 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2756 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2757 strh ip, [r0]
2758 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2759 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2760 #ifdef __ARMEB__
2761 mov r2, r2, lsl #24 /* r2 = 2... */
2762 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2763 mov r3, r3, lsl #24 /* r3 = 6... */
2764 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2765 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2766 #else
2767 mov r2, r2, lsr #24 /* r2 = ...2 */
2768 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2769 mov r3, r3, lsr #24 /* r3 = ...6 */
2770 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2771 mov r1, r1, lsl #8 /* r1 = ..B. */
2772 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2773 #endif
2774 str r2, [r0, #0x02]
2775 str r3, [r0, #0x06]
2776 strh r1, [r0, #0x0a]
2777 RET
2778 LMEMCPY_C_PAD
2779
2780 /*
2781 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2782 */
2783 ldrh r2, [r1]
2784 ldr r3, [r1, #0x02]
2785 ldr ip, [r1, #0x06]
2786 ldrh r1, [r1, #0x0a]
2787 strh r2, [r0]
2788 str r3, [r0, #0x02]
2789 str ip, [r0, #0x06]
2790 strh r1, [r0, #0x0a]
2791 RET
2792 LMEMCPY_C_PAD
2793
2794 /*
2795 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2796 */
2797 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2798 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2799 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2800 strh ip, [r0, #0x0a]
2801 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2802 ldrb r1, [r1] /* r1 = ...0 */
2803 #ifdef __ARMEB__
2804 mov r2, r2, lsr #24 /* r2 = ...9 */
2805 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2806 mov r3, r3, lsr #24 /* r3 = ...5 */
2807 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2808 mov r1, r1, lsl #8 /* r1 = ..0. */
2809 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2810 #else
2811 mov r2, r2, lsl #24 /* r2 = 9... */
2812 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2813 mov r3, r3, lsl #24 /* r3 = 5... */
2814 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2815 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2816 #endif
2817 str r2, [r0, #0x06]
2818 str r3, [r0, #0x02]
2819 strh r1, [r0]
2820 RET
2821 LMEMCPY_C_PAD
2822
2823 /*
2824 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2825 */
2826 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2827 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2828 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2829 #ifdef __ARMEB__
2830 mov r3, r2, lsr #24 /* r3 = ...0 */
2831 strb r3, [r0]
2832 mov r2, r2, lsl #8 /* r2 = 123. */
2833 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2834 str r2, [r0, #0x01]
2835 mov r2, ip, lsl #8 /* r2 = 567. */
2836 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2837 str r2, [r0, #0x05]
2838 mov r2, r1, lsr #8 /* r2 = ..9A */
2839 strh r2, [r0, #0x09]
2840 strb r1, [r0, #0x0b]
2841 #else
2842 strb r2, [r0]
2843 mov r3, r2, lsr #8 /* r3 = .321 */
2844 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2845 str r3, [r0, #0x01]
2846 mov r3, ip, lsr #8 /* r3 = .765 */
2847 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2848 str r3, [r0, #0x05]
2849 mov r1, r1, lsr #8 /* r1 = .BA9 */
2850 strh r1, [r0, #0x09]
2851 mov r1, r1, lsr #16 /* r1 = ...B */
2852 strb r1, [r0, #0x0b]
2853 #endif
2854 RET
2855 LMEMCPY_C_PAD
2856
2857 /*
2858 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2859 */
2860 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2861 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2862 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2863 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2864 strb r2, [r0, #0x0b]
2865 #ifdef __ARMEB__
2866 strh r3, [r0, #0x09]
2867 mov r3, r3, lsr #16 /* r3 = ..78 */
2868 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2869 mov ip, ip, lsr #16 /* ip = ..34 */
2870 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2871 mov r1, r1, lsr #16 /* r1 = ..x0 */
2872 #else
2873 mov r2, r3, lsr #16 /* r2 = ..A9 */
2874 strh r2, [r0, #0x09]
2875 mov r3, r3, lsl #16 /* r3 = 87.. */
2876 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2877 mov ip, ip, lsl #16 /* ip = 43.. */
2878 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2879 mov r1, r1, lsr #8 /* r1 = .210 */
2880 #endif
2881 str r3, [r0, #0x05]
2882 str ip, [r0, #0x01]
2883 strb r1, [r0]
2884 RET
2885 LMEMCPY_C_PAD
2886
2887 /*
2888 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2889 */
2890 #ifdef __ARMEB__
2891 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2892 ldr ip, [r1, #0x06] /* ip = 6789 */
2893 ldr r3, [r1, #0x02] /* r3 = 2345 */
2894 ldrh r1, [r1] /* r1 = ..01 */
2895 strb r2, [r0, #0x0b]
2896 mov r2, r2, lsr #8 /* r2 = ...A */
2897 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2898 mov ip, ip, lsr #8 /* ip = .678 */
2899 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2900 mov r3, r3, lsr #8 /* r3 = .234 */
2901 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2902 mov r1, r1, lsr #8 /* r1 = ...0 */
2903 strb r1, [r0]
2904 str r3, [r0, #0x01]
2905 str ip, [r0, #0x05]
2906 strh r2, [r0, #0x09]
2907 #else
2908 ldrh r2, [r1] /* r2 = ..10 */
2909 ldr r3, [r1, #0x02] /* r3 = 5432 */
2910 ldr ip, [r1, #0x06] /* ip = 9876 */
2911 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2912 strb r2, [r0]
2913 mov r2, r2, lsr #8 /* r2 = ...1 */
2914 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2915 mov r3, r3, lsr #24 /* r3 = ...5 */
2916 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2917 mov ip, ip, lsr #24 /* ip = ...9 */
2918 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2919 mov r1, r1, lsr #8 /* r1 = ...B */
2920 str r2, [r0, #0x01]
2921 str r3, [r0, #0x05]
2922 strh ip, [r0, #0x09]
2923 strb r1, [r0, #0x0b]
2924 #endif
2925 RET
2926 LMEMCPY_C_PAD
2927
2928 /*
2929 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2930 */
2931 ldrb r2, [r1]
2932 ldr r3, [r1, #0x01]
2933 ldr ip, [r1, #0x05]
2934 strb r2, [r0]
2935 ldrh r2, [r1, #0x09]
2936 ldrb r1, [r1, #0x0b]
2937 str r3, [r0, #0x01]
2938 str ip, [r0, #0x05]
2939 strh r2, [r0, #0x09]
2940 strb r1, [r0, #0x0b]
2941 RET
2942 #endif /* _ARM_ARCH_5E */
2943
2944 #ifdef GPROF
2945
2946 ENTRY(user)
2947 nop
2948 ENTRY(btrap)
2949 nop
2950 ENTRY(etrap)
2951 nop
2952 ENTRY(bintr)
2953 nop
2954 ENTRY(eintr)
2955 nop
2956
2957 #endif
Cache object: 2565a246e52a2cf6a2fee1f062ca883c
|