FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
29 *
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
46 * written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 */
60 /*
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
63 *
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 *
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
87 */
88
89 #include <machine/asm.h>
90 #include <machine/asmacros.h>
91 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/support.S 248361 2013-03-16 02:48:49Z andrew $");
92
93 #include "assym.s"
94
95 .L_arm_memcpy:
96 .word _C_LABEL(_arm_memcpy)
97 .L_arm_bzero:
98 .word _C_LABEL(_arm_bzero)
99 .L_min_memcpy_size:
100 .word _C_LABEL(_min_memcpy_size)
101 .L_min_bzero_size:
102 .word _C_LABEL(_min_bzero_size)
103 /*
104 * memset: Sets a block of memory to the specified value
105 *
106 * On entry:
107 * r0 - dest address
108 * r1 - byte to write
109 * r2 - number of bytes to write
110 *
111 * On exit:
112 * r0 - dest address
113 */
114 /* LINTSTUB: Func: void bzero(void *, size_t) */
115 ENTRY(bzero)
116 ldr r3, .L_arm_bzero
117 ldr r3, [r3]
118 cmp r3, #0
119 beq .Lnormal0
120 ldr r2, .L_min_bzero_size
121 ldr r2, [r2]
122 cmp r1, r2
123 blt .Lnormal0
124 stmfd sp!, {r0, r1, lr}
125 mov r2, #0
126 mov lr, pc
127 mov pc, r3
128 cmp r0, #0
129 ldmfd sp!, {r0, r1, lr}
130 RETeq
131 .Lnormal0:
132 mov r3, #0x00
133 b do_memset
134
135 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
136 ENTRY(memset)
137 and r3, r1, #0xff /* We deal with bytes */
138 mov r1, r2
139 do_memset:
140 cmp r1, #0x04 /* Do we have less than 4 bytes */
141 mov ip, r0
142 blt .Lmemset_lessthanfour
143
144 /* Ok first we will word align the address */
145 ands r2, ip, #0x03 /* Get the bottom two bits */
146 bne .Lmemset_wordunaligned /* The address is not word aligned */
147
148 /* We are now word aligned */
149 .Lmemset_wordaligned:
150 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
151 #ifdef _ARM_ARCH_5E
152 tst ip, #0x04 /* Quad-align for armv5e */
153 #else
154 cmp r1, #0x10
155 #endif
156 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
157 #ifdef _ARM_ARCH_5E
158 subne r1, r1, #0x04 /* Quad-align if necessary */
159 strne r3, [ip], #0x04
160 cmp r1, #0x10
161 #endif
162 blt .Lmemset_loop4 /* If less than 16 then use words */
163 mov r2, r3 /* Duplicate data */
164 cmp r1, #0x80 /* If < 128 then skip the big loop */
165 blt .Lmemset_loop32
166
167 /* Do 128 bytes at a time */
168 .Lmemset_loop128:
169 subs r1, r1, #0x80
170 #ifdef _ARM_ARCH_5E
171 strged r2, [ip], #0x08
172 strged r2, [ip], #0x08
173 strged r2, [ip], #0x08
174 strged r2, [ip], #0x08
175 strged r2, [ip], #0x08
176 strged r2, [ip], #0x08
177 strged r2, [ip], #0x08
178 strged r2, [ip], #0x08
179 strged r2, [ip], #0x08
180 strged r2, [ip], #0x08
181 strged r2, [ip], #0x08
182 strged r2, [ip], #0x08
183 strged r2, [ip], #0x08
184 strged r2, [ip], #0x08
185 strged r2, [ip], #0x08
186 strged r2, [ip], #0x08
187 #else
188 stmgeia ip!, {r2-r3}
189 stmgeia ip!, {r2-r3}
190 stmgeia ip!, {r2-r3}
191 stmgeia ip!, {r2-r3}
192 stmgeia ip!, {r2-r3}
193 stmgeia ip!, {r2-r3}
194 stmgeia ip!, {r2-r3}
195 stmgeia ip!, {r2-r3}
196 stmgeia ip!, {r2-r3}
197 stmgeia ip!, {r2-r3}
198 stmgeia ip!, {r2-r3}
199 stmgeia ip!, {r2-r3}
200 stmgeia ip!, {r2-r3}
201 stmgeia ip!, {r2-r3}
202 stmgeia ip!, {r2-r3}
203 stmgeia ip!, {r2-r3}
204 #endif
205 bgt .Lmemset_loop128
206 RETeq /* Zero length so just exit */
207
208 add r1, r1, #0x80 /* Adjust for extra sub */
209
210 /* Do 32 bytes at a time */
211 .Lmemset_loop32:
212 subs r1, r1, #0x20
213 #ifdef _ARM_ARCH_5E
214 strged r2, [ip], #0x08
215 strged r2, [ip], #0x08
216 strged r2, [ip], #0x08
217 strged r2, [ip], #0x08
218 #else
219 stmgeia ip!, {r2-r3}
220 stmgeia ip!, {r2-r3}
221 stmgeia ip!, {r2-r3}
222 stmgeia ip!, {r2-r3}
223 #endif
224 bgt .Lmemset_loop32
225 RETeq /* Zero length so just exit */
226
227 adds r1, r1, #0x10 /* Partially adjust for extra sub */
228
229 /* Deal with 16 bytes or more */
230 #ifdef _ARM_ARCH_5E
231 strged r2, [ip], #0x08
232 strged r2, [ip], #0x08
233 #else
234 stmgeia ip!, {r2-r3}
235 stmgeia ip!, {r2-r3}
236 #endif
237 RETeq /* Zero length so just exit */
238
239 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
240
241 /* We have at least 4 bytes so copy as words */
242 .Lmemset_loop4:
243 subs r1, r1, #0x04
244 strge r3, [ip], #0x04
245 bgt .Lmemset_loop4
246 RETeq /* Zero length so just exit */
247
248 #ifdef _ARM_ARCH_5E
249 /* Compensate for 64-bit alignment check */
250 adds r1, r1, #0x04
251 RETeq
252 cmp r1, #2
253 #else
254 cmp r1, #-2
255 #endif
256
257 strb r3, [ip], #0x01 /* Set 1 byte */
258 strgeb r3, [ip], #0x01 /* Set another byte */
259 strgtb r3, [ip] /* and a third */
260 RET /* Exit */
261
262 .Lmemset_wordunaligned:
263 rsb r2, r2, #0x004
264 strb r3, [ip], #0x01 /* Set 1 byte */
265 cmp r2, #0x02
266 strgeb r3, [ip], #0x01 /* Set another byte */
267 sub r1, r1, r2
268 strgtb r3, [ip], #0x01 /* and a third */
269 cmp r1, #0x04 /* More than 4 bytes left? */
270 bge .Lmemset_wordaligned /* Yup */
271
272 .Lmemset_lessthanfour:
273 cmp r1, #0x00
274 RETeq /* Zero length so exit */
275 strb r3, [ip], #0x01 /* Set 1 byte */
276 cmp r1, #0x02
277 strgeb r3, [ip], #0x01 /* Set another byte */
278 strgtb r3, [ip] /* and a third */
279 RET /* Exit */
280 END(bzero)
281 END(memset)
282
283 ENTRY(bcmp)
284 mov ip, r0
285 cmp r2, #0x06
286 beq .Lmemcmp_6bytes
287 mov r0, #0x00
288
289 /* Are both addresses aligned the same way? */
290 cmp r2, #0x00
291 eornes r3, ip, r1
292 RETeq /* len == 0, or same addresses! */
293 tst r3, #0x03
294 subne r2, r2, #0x01
295 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
296
297 /* Word-align the addresses, if necessary */
298 sub r3, r1, #0x05
299 ands r3, r3, #0x03
300 add r3, r3, r3, lsl #1
301 addne pc, pc, r3, lsl #3
302 nop
303
304 /* Compare up to 3 bytes */
305 ldrb r0, [ip], #0x01
306 ldrb r3, [r1], #0x01
307 subs r0, r0, r3
308 RETne
309 subs r2, r2, #0x01
310 RETeq
311
312 /* Compare up to 2 bytes */
313 ldrb r0, [ip], #0x01
314 ldrb r3, [r1], #0x01
315 subs r0, r0, r3
316 RETne
317 subs r2, r2, #0x01
318 RETeq
319
320 /* Compare 1 byte */
321 ldrb r0, [ip], #0x01
322 ldrb r3, [r1], #0x01
323 subs r0, r0, r3
324 RETne
325 subs r2, r2, #0x01
326 RETeq
327
328 /* Compare 4 bytes at a time, if possible */
329 subs r2, r2, #0x04
330 bcc .Lmemcmp_bytewise
331 .Lmemcmp_word_aligned:
332 ldr r0, [ip], #0x04
333 ldr r3, [r1], #0x04
334 subs r2, r2, #0x04
335 cmpcs r0, r3
336 beq .Lmemcmp_word_aligned
337 sub r0, r0, r3
338
339 /* Correct for extra subtraction, and check if done */
340 adds r2, r2, #0x04
341 cmpeq r0, #0x00 /* If done, did all bytes match? */
342 RETeq /* Yup. Just return */
343
344 /* Re-do the final word byte-wise */
345 sub ip, ip, #0x04
346 sub r1, r1, #0x04
347
348 .Lmemcmp_bytewise:
349 add r2, r2, #0x03
350 .Lmemcmp_bytewise2:
351 ldrb r0, [ip], #0x01
352 ldrb r3, [r1], #0x01
353 subs r2, r2, #0x01
354 cmpcs r0, r3
355 beq .Lmemcmp_bytewise2
356 sub r0, r0, r3
357 RET
358
359 /*
360 * 6 byte compares are very common, thanks to the network stack.
361 * This code is hand-scheduled to reduce the number of stalls for
362 * load results. Everything else being equal, this will be ~32%
363 * faster than a byte-wise memcmp.
364 */
365 .align 5
366 .Lmemcmp_6bytes:
367 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
368 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
369 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
370 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
371 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */
372 RETne /* Return if mismatch on #0 */
373 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
374 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */
375 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */
376 RETne /* Return if mismatch on #1 */
377 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
378 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
379 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */
380 RETne /* Return if mismatch on #2 */
381 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
382 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */
383 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */
384 RETne /* Return if mismatch on #3 */
385 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
386 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
387 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */
388 RETne /* Return if mismatch on #4 */
389 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
390 RET
391 END(bcmp)
392
393 ENTRY(bcopy)
394 /* switch the source and destination registers */
395 eor r0, r1, r0
396 eor r1, r0, r1
397 eor r0, r1, r0
398 ENTRY(memmove)
399 /* Do the buffers overlap? */
400 cmp r0, r1
401 RETeq /* Bail now if src/dst are the same */
402 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
403 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
404 cmp r3, r2 /* if (r3 < len) we have an overlap */
405 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
406
407 /* Determine copy direction */
408 cmp r1, r0
409 bcc .Lmemmove_backwards
410
411 moveq r0, #0 /* Quick abort for len=0 */
412 RETeq
413
414 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
415 subs r2, r2, #4
416 blt .Lmemmove_fl4 /* less than 4 bytes */
417 ands r12, r0, #3
418 bne .Lmemmove_fdestul /* oh unaligned destination addr */
419 ands r12, r1, #3
420 bne .Lmemmove_fsrcul /* oh unaligned source addr */
421
422 .Lmemmove_ft8:
423 /* We have aligned source and destination */
424 subs r2, r2, #8
425 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
426 subs r2, r2, #0x14
427 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
428 stmdb sp!, {r4} /* borrow r4 */
429
430 /* blat 32 bytes at a time */
431 /* XXX for really big copies perhaps we should use more registers */
432 .Lmemmove_floop32:
433 ldmia r1!, {r3, r4, r12, lr}
434 stmia r0!, {r3, r4, r12, lr}
435 ldmia r1!, {r3, r4, r12, lr}
436 stmia r0!, {r3, r4, r12, lr}
437 subs r2, r2, #0x20
438 bge .Lmemmove_floop32
439
440 cmn r2, #0x10
441 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
442 stmgeia r0!, {r3, r4, r12, lr}
443 subge r2, r2, #0x10
444 ldmia sp!, {r4} /* return r4 */
445
446 .Lmemmove_fl32:
447 adds r2, r2, #0x14
448
449 /* blat 12 bytes at a time */
450 .Lmemmove_floop12:
451 ldmgeia r1!, {r3, r12, lr}
452 stmgeia r0!, {r3, r12, lr}
453 subges r2, r2, #0x0c
454 bge .Lmemmove_floop12
455
456 .Lmemmove_fl12:
457 adds r2, r2, #8
458 blt .Lmemmove_fl4
459
460 subs r2, r2, #4
461 ldrlt r3, [r1], #4
462 strlt r3, [r0], #4
463 ldmgeia r1!, {r3, r12}
464 stmgeia r0!, {r3, r12}
465 subge r2, r2, #4
466
467 .Lmemmove_fl4:
468 /* less than 4 bytes to go */
469 adds r2, r2, #4
470 ldmeqia sp!, {r0, pc} /* done */
471
472 /* copy the crud byte at a time */
473 cmp r2, #2
474 ldrb r3, [r1], #1
475 strb r3, [r0], #1
476 ldrgeb r3, [r1], #1
477 strgeb r3, [r0], #1
478 ldrgtb r3, [r1], #1
479 strgtb r3, [r0], #1
480 ldmia sp!, {r0, pc}
481
482 /* erg - unaligned destination */
483 .Lmemmove_fdestul:
484 rsb r12, r12, #4
485 cmp r12, #2
486
487 /* align destination with byte copies */
488 ldrb r3, [r1], #1
489 strb r3, [r0], #1
490 ldrgeb r3, [r1], #1
491 strgeb r3, [r0], #1
492 ldrgtb r3, [r1], #1
493 strgtb r3, [r0], #1
494 subs r2, r2, r12
495 blt .Lmemmove_fl4 /* less the 4 bytes */
496
497 ands r12, r1, #3
498 beq .Lmemmove_ft8 /* we have an aligned source */
499
500 /* erg - unaligned source */
501 /* This is where it gets nasty ... */
502 .Lmemmove_fsrcul:
503 bic r1, r1, #3
504 ldr lr, [r1], #4
505 cmp r12, #2
506 bgt .Lmemmove_fsrcul3
507 beq .Lmemmove_fsrcul2
508 cmp r2, #0x0c
509 blt .Lmemmove_fsrcul1loop4
510 sub r2, r2, #0x0c
511 stmdb sp!, {r4, r5}
512
513 .Lmemmove_fsrcul1loop16:
514 #ifdef __ARMEB__
515 mov r3, lr, lsl #8
516 #else
517 mov r3, lr, lsr #8
518 #endif
519 ldmia r1!, {r4, r5, r12, lr}
520 #ifdef __ARMEB__
521 orr r3, r3, r4, lsr #24
522 mov r4, r4, lsl #8
523 orr r4, r4, r5, lsr #24
524 mov r5, r5, lsl #8
525 orr r5, r5, r12, lsr #24
526 mov r12, r12, lsl #8
527 orr r12, r12, lr, lsr #24
528 #else
529 orr r3, r3, r4, lsl #24
530 mov r4, r4, lsr #8
531 orr r4, r4, r5, lsl #24
532 mov r5, r5, lsr #8
533 orr r5, r5, r12, lsl #24
534 mov r12, r12, lsr #8
535 orr r12, r12, lr, lsl #24
536 #endif
537 stmia r0!, {r3-r5, r12}
538 subs r2, r2, #0x10
539 bge .Lmemmove_fsrcul1loop16
540 ldmia sp!, {r4, r5}
541 adds r2, r2, #0x0c
542 blt .Lmemmove_fsrcul1l4
543
544 .Lmemmove_fsrcul1loop4:
545 #ifdef __ARMEB__
546 mov r12, lr, lsl #8
547 #else
548 mov r12, lr, lsr #8
549 #endif
550 ldr lr, [r1], #4
551 #ifdef __ARMEB__
552 orr r12, r12, lr, lsr #24
553 #else
554 orr r12, r12, lr, lsl #24
555 #endif
556 str r12, [r0], #4
557 subs r2, r2, #4
558 bge .Lmemmove_fsrcul1loop4
559
560 .Lmemmove_fsrcul1l4:
561 sub r1, r1, #3
562 b .Lmemmove_fl4
563
564 .Lmemmove_fsrcul2:
565 cmp r2, #0x0c
566 blt .Lmemmove_fsrcul2loop4
567 sub r2, r2, #0x0c
568 stmdb sp!, {r4, r5}
569
570 .Lmemmove_fsrcul2loop16:
571 #ifdef __ARMEB__
572 mov r3, lr, lsl #16
573 #else
574 mov r3, lr, lsr #16
575 #endif
576 ldmia r1!, {r4, r5, r12, lr}
577 #ifdef __ARMEB__
578 orr r3, r3, r4, lsr #16
579 mov r4, r4, lsl #16
580 orr r4, r4, r5, lsr #16
581 mov r5, r5, lsl #16
582 orr r5, r5, r12, lsr #16
583 mov r12, r12, lsl #16
584 orr r12, r12, lr, lsr #16
585 #else
586 orr r3, r3, r4, lsl #16
587 mov r4, r4, lsr #16
588 orr r4, r4, r5, lsl #16
589 mov r5, r5, lsr #16
590 orr r5, r5, r12, lsl #16
591 mov r12, r12, lsr #16
592 orr r12, r12, lr, lsl #16
593 #endif
594 stmia r0!, {r3-r5, r12}
595 subs r2, r2, #0x10
596 bge .Lmemmove_fsrcul2loop16
597 ldmia sp!, {r4, r5}
598 adds r2, r2, #0x0c
599 blt .Lmemmove_fsrcul2l4
600
601 .Lmemmove_fsrcul2loop4:
602 #ifdef __ARMEB__
603 mov r12, lr, lsl #16
604 #else
605 mov r12, lr, lsr #16
606 #endif
607 ldr lr, [r1], #4
608 #ifdef __ARMEB__
609 orr r12, r12, lr, lsr #16
610 #else
611 orr r12, r12, lr, lsl #16
612 #endif
613 str r12, [r0], #4
614 subs r2, r2, #4
615 bge .Lmemmove_fsrcul2loop4
616
617 .Lmemmove_fsrcul2l4:
618 sub r1, r1, #2
619 b .Lmemmove_fl4
620
621 .Lmemmove_fsrcul3:
622 cmp r2, #0x0c
623 blt .Lmemmove_fsrcul3loop4
624 sub r2, r2, #0x0c
625 stmdb sp!, {r4, r5}
626
627 .Lmemmove_fsrcul3loop16:
628 #ifdef __ARMEB__
629 mov r3, lr, lsl #24
630 #else
631 mov r3, lr, lsr #24
632 #endif
633 ldmia r1!, {r4, r5, r12, lr}
634 #ifdef __ARMEB__
635 orr r3, r3, r4, lsr #8
636 mov r4, r4, lsl #24
637 orr r4, r4, r5, lsr #8
638 mov r5, r5, lsl #24
639 orr r5, r5, r12, lsr #8
640 mov r12, r12, lsl #24
641 orr r12, r12, lr, lsr #8
642 #else
643 orr r3, r3, r4, lsl #8
644 mov r4, r4, lsr #24
645 orr r4, r4, r5, lsl #8
646 mov r5, r5, lsr #24
647 orr r5, r5, r12, lsl #8
648 mov r12, r12, lsr #24
649 orr r12, r12, lr, lsl #8
650 #endif
651 stmia r0!, {r3-r5, r12}
652 subs r2, r2, #0x10
653 bge .Lmemmove_fsrcul3loop16
654 ldmia sp!, {r4, r5}
655 adds r2, r2, #0x0c
656 blt .Lmemmove_fsrcul3l4
657
658 .Lmemmove_fsrcul3loop4:
659 #ifdef __ARMEB__
660 mov r12, lr, lsl #24
661 #else
662 mov r12, lr, lsr #24
663 #endif
664 ldr lr, [r1], #4
665 #ifdef __ARMEB__
666 orr r12, r12, lr, lsr #8
667 #else
668 orr r12, r12, lr, lsl #8
669 #endif
670 str r12, [r0], #4
671 subs r2, r2, #4
672 bge .Lmemmove_fsrcul3loop4
673
674 .Lmemmove_fsrcul3l4:
675 sub r1, r1, #1
676 b .Lmemmove_fl4
677
678 .Lmemmove_backwards:
679 add r1, r1, r2
680 add r0, r0, r2
681 subs r2, r2, #4
682 blt .Lmemmove_bl4 /* less than 4 bytes */
683 ands r12, r0, #3
684 bne .Lmemmove_bdestul /* oh unaligned destination addr */
685 ands r12, r1, #3
686 bne .Lmemmove_bsrcul /* oh unaligned source addr */
687
688 .Lmemmove_bt8:
689 /* We have aligned source and destination */
690 subs r2, r2, #8
691 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
692 stmdb sp!, {r4, lr}
693 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
694 blt .Lmemmove_bl32
695
696 /* blat 32 bytes at a time */
697 /* XXX for really big copies perhaps we should use more registers */
698 .Lmemmove_bloop32:
699 ldmdb r1!, {r3, r4, r12, lr}
700 stmdb r0!, {r3, r4, r12, lr}
701 ldmdb r1!, {r3, r4, r12, lr}
702 stmdb r0!, {r3, r4, r12, lr}
703 subs r2, r2, #0x20
704 bge .Lmemmove_bloop32
705
706 .Lmemmove_bl32:
707 cmn r2, #0x10
708 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
709 stmgedb r0!, {r3, r4, r12, lr}
710 subge r2, r2, #0x10
711 adds r2, r2, #0x14
712 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
713 stmgedb r0!, {r3, r12, lr}
714 subge r2, r2, #0x0c
715 ldmia sp!, {r4, lr}
716
717 .Lmemmove_bl12:
718 adds r2, r2, #8
719 blt .Lmemmove_bl4
720 subs r2, r2, #4
721 ldrlt r3, [r1, #-4]!
722 strlt r3, [r0, #-4]!
723 ldmgedb r1!, {r3, r12}
724 stmgedb r0!, {r3, r12}
725 subge r2, r2, #4
726
727 .Lmemmove_bl4:
728 /* less than 4 bytes to go */
729 adds r2, r2, #4
730 RETeq /* done */
731
732 /* copy the crud byte at a time */
733 cmp r2, #2
734 ldrb r3, [r1, #-1]!
735 strb r3, [r0, #-1]!
736 ldrgeb r3, [r1, #-1]!
737 strgeb r3, [r0, #-1]!
738 ldrgtb r3, [r1, #-1]!
739 strgtb r3, [r0, #-1]!
740 RET
741
742 /* erg - unaligned destination */
743 .Lmemmove_bdestul:
744 cmp r12, #2
745
746 /* align destination with byte copies */
747 ldrb r3, [r1, #-1]!
748 strb r3, [r0, #-1]!
749 ldrgeb r3, [r1, #-1]!
750 strgeb r3, [r0, #-1]!
751 ldrgtb r3, [r1, #-1]!
752 strgtb r3, [r0, #-1]!
753 subs r2, r2, r12
754 blt .Lmemmove_bl4 /* less than 4 bytes to go */
755 ands r12, r1, #3
756 beq .Lmemmove_bt8 /* we have an aligned source */
757
758 /* erg - unaligned source */
759 /* This is where it gets nasty ... */
760 .Lmemmove_bsrcul:
761 bic r1, r1, #3
762 ldr r3, [r1, #0]
763 cmp r12, #2
764 blt .Lmemmove_bsrcul1
765 beq .Lmemmove_bsrcul2
766 cmp r2, #0x0c
767 blt .Lmemmove_bsrcul3loop4
768 sub r2, r2, #0x0c
769 stmdb sp!, {r4, r5, lr}
770
771 .Lmemmove_bsrcul3loop16:
772 #ifdef __ARMEB__
773 mov lr, r3, lsr #8
774 #else
775 mov lr, r3, lsl #8
776 #endif
777 ldmdb r1!, {r3-r5, r12}
778 #ifdef __ARMEB__
779 orr lr, lr, r12, lsl #24
780 mov r12, r12, lsr #8
781 orr r12, r12, r5, lsl #24
782 mov r5, r5, lsr #8
783 orr r5, r5, r4, lsl #24
784 mov r4, r4, lsr #8
785 orr r4, r4, r3, lsl #24
786 #else
787 orr lr, lr, r12, lsr #24
788 mov r12, r12, lsl #8
789 orr r12, r12, r5, lsr #24
790 mov r5, r5, lsl #8
791 orr r5, r5, r4, lsr #24
792 mov r4, r4, lsl #8
793 orr r4, r4, r3, lsr #24
794 #endif
795 stmdb r0!, {r4, r5, r12, lr}
796 subs r2, r2, #0x10
797 bge .Lmemmove_bsrcul3loop16
798 ldmia sp!, {r4, r5, lr}
799 adds r2, r2, #0x0c
800 blt .Lmemmove_bsrcul3l4
801
802 .Lmemmove_bsrcul3loop4:
803 #ifdef __ARMEB__
804 mov r12, r3, lsr #8
805 #else
806 mov r12, r3, lsl #8
807 #endif
808 ldr r3, [r1, #-4]!
809 #ifdef __ARMEB__
810 orr r12, r12, r3, lsl #24
811 #else
812 orr r12, r12, r3, lsr #24
813 #endif
814 str r12, [r0, #-4]!
815 subs r2, r2, #4
816 bge .Lmemmove_bsrcul3loop4
817
818 .Lmemmove_bsrcul3l4:
819 add r1, r1, #3
820 b .Lmemmove_bl4
821
822 .Lmemmove_bsrcul2:
823 cmp r2, #0x0c
824 blt .Lmemmove_bsrcul2loop4
825 sub r2, r2, #0x0c
826 stmdb sp!, {r4, r5, lr}
827
828 .Lmemmove_bsrcul2loop16:
829 #ifdef __ARMEB__
830 mov lr, r3, lsr #16
831 #else
832 mov lr, r3, lsl #16
833 #endif
834 ldmdb r1!, {r3-r5, r12}
835 #ifdef __ARMEB__
836 orr lr, lr, r12, lsl #16
837 mov r12, r12, lsr #16
838 orr r12, r12, r5, lsl #16
839 mov r5, r5, lsr #16
840 orr r5, r5, r4, lsl #16
841 mov r4, r4, lsr #16
842 orr r4, r4, r3, lsl #16
843 #else
844 orr lr, lr, r12, lsr #16
845 mov r12, r12, lsl #16
846 orr r12, r12, r5, lsr #16
847 mov r5, r5, lsl #16
848 orr r5, r5, r4, lsr #16
849 mov r4, r4, lsl #16
850 orr r4, r4, r3, lsr #16
851 #endif
852 stmdb r0!, {r4, r5, r12, lr}
853 subs r2, r2, #0x10
854 bge .Lmemmove_bsrcul2loop16
855 ldmia sp!, {r4, r5, lr}
856 adds r2, r2, #0x0c
857 blt .Lmemmove_bsrcul2l4
858
859 .Lmemmove_bsrcul2loop4:
860 #ifdef __ARMEB__
861 mov r12, r3, lsr #16
862 #else
863 mov r12, r3, lsl #16
864 #endif
865 ldr r3, [r1, #-4]!
866 #ifdef __ARMEB__
867 orr r12, r12, r3, lsl #16
868 #else
869 orr r12, r12, r3, lsr #16
870 #endif
871 str r12, [r0, #-4]!
872 subs r2, r2, #4
873 bge .Lmemmove_bsrcul2loop4
874
875 .Lmemmove_bsrcul2l4:
876 add r1, r1, #2
877 b .Lmemmove_bl4
878
879 .Lmemmove_bsrcul1:
880 cmp r2, #0x0c
881 blt .Lmemmove_bsrcul1loop4
882 sub r2, r2, #0x0c
883 stmdb sp!, {r4, r5, lr}
884
885 .Lmemmove_bsrcul1loop32:
886 #ifdef __ARMEB__
887 mov lr, r3, lsr #24
888 #else
889 mov lr, r3, lsl #24
890 #endif
891 ldmdb r1!, {r3-r5, r12}
892 #ifdef __ARMEB__
893 orr lr, lr, r12, lsl #8
894 mov r12, r12, lsr #24
895 orr r12, r12, r5, lsl #8
896 mov r5, r5, lsr #24
897 orr r5, r5, r4, lsl #8
898 mov r4, r4, lsr #24
899 orr r4, r4, r3, lsl #8
900 #else
901 orr lr, lr, r12, lsr #8
902 mov r12, r12, lsl #24
903 orr r12, r12, r5, lsr #8
904 mov r5, r5, lsl #24
905 orr r5, r5, r4, lsr #8
906 mov r4, r4, lsl #24
907 orr r4, r4, r3, lsr #8
908 #endif
909 stmdb r0!, {r4, r5, r12, lr}
910 subs r2, r2, #0x10
911 bge .Lmemmove_bsrcul1loop32
912 ldmia sp!, {r4, r5, lr}
913 adds r2, r2, #0x0c
914 blt .Lmemmove_bsrcul1l4
915
916 .Lmemmove_bsrcul1loop4:
917 #ifdef __ARMEB__
918 mov r12, r3, lsr #24
919 #else
920 mov r12, r3, lsl #24
921 #endif
922 ldr r3, [r1, #-4]!
923 #ifdef __ARMEB__
924 orr r12, r12, r3, lsl #8
925 #else
926 orr r12, r12, r3, lsr #8
927 #endif
928 str r12, [r0, #-4]!
929 subs r2, r2, #4
930 bge .Lmemmove_bsrcul1loop4
931
932 .Lmemmove_bsrcul1l4:
933 add r1, r1, #1
934 b .Lmemmove_bl4
935 END(bcopy)
936 END(memmove)
937
938 #if !defined(_ARM_ARCH_5E)
939 ENTRY(memcpy)
940 /* save leaf functions having to store this away */
941 /* Do not check arm_memcpy if we're running from flash */
942 #ifdef FLASHADDR
943 #if FLASHADDR > PHYSADDR
944 ldr r3, =FLASHADDR
945 cmp r3, pc
946 bls .Lnormal
947 #else
948 ldr r3, =FLASHADDR
949 cmp r3, pc
950 bhi .Lnormal
951 #endif
952 #endif
953 ldr r3, .L_arm_memcpy
954 ldr r3, [r3]
955 cmp r3, #0
956 beq .Lnormal
957 ldr r3, .L_min_memcpy_size
958 ldr r3, [r3]
959 cmp r2, r3
960 blt .Lnormal
961 stmfd sp!, {r0-r2, r4, lr}
962 mov r3, #0
963 ldr r4, .L_arm_memcpy
964 mov lr, pc
965 ldr pc, [r4]
966 cmp r0, #0
967 ldmfd sp!, {r0-r2, r4, lr}
968 RETeq
969
970 .Lnormal:
971 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
972
973 subs r2, r2, #4
974 blt .Lmemcpy_l4 /* less than 4 bytes */
975 ands r12, r0, #3
976 bne .Lmemcpy_destul /* oh unaligned destination addr */
977 ands r12, r1, #3
978 bne .Lmemcpy_srcul /* oh unaligned source addr */
979
980 .Lmemcpy_t8:
981 /* We have aligned source and destination */
982 subs r2, r2, #8
983 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
984 subs r2, r2, #0x14
985 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
986 stmdb sp!, {r4} /* borrow r4 */
987
988 /* blat 32 bytes at a time */
989 /* XXX for really big copies perhaps we should use more registers */
990 .Lmemcpy_loop32:
991 ldmia r1!, {r3, r4, r12, lr}
992 stmia r0!, {r3, r4, r12, lr}
993 ldmia r1!, {r3, r4, r12, lr}
994 stmia r0!, {r3, r4, r12, lr}
995 subs r2, r2, #0x20
996 bge .Lmemcpy_loop32
997
998 cmn r2, #0x10
999 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
1000 stmgeia r0!, {r3, r4, r12, lr}
1001 subge r2, r2, #0x10
1002 ldmia sp!, {r4} /* return r4 */
1003
1004 .Lmemcpy_l32:
1005 adds r2, r2, #0x14
1006
1007 /* blat 12 bytes at a time */
1008 .Lmemcpy_loop12:
1009 ldmgeia r1!, {r3, r12, lr}
1010 stmgeia r0!, {r3, r12, lr}
1011 subges r2, r2, #0x0c
1012 bge .Lmemcpy_loop12
1013
1014 .Lmemcpy_l12:
1015 adds r2, r2, #8
1016 blt .Lmemcpy_l4
1017
1018 subs r2, r2, #4
1019 ldrlt r3, [r1], #4
1020 strlt r3, [r0], #4
1021 ldmgeia r1!, {r3, r12}
1022 stmgeia r0!, {r3, r12}
1023 subge r2, r2, #4
1024
1025 .Lmemcpy_l4:
1026 /* less than 4 bytes to go */
1027 adds r2, r2, #4
1028 #ifdef __APCS_26_
1029 ldmeqia sp!, {r0, pc}^ /* done */
1030 #else
1031 ldmeqia sp!, {r0, pc} /* done */
1032 #endif
1033 /* copy the crud byte at a time */
1034 cmp r2, #2
1035 ldrb r3, [r1], #1
1036 strb r3, [r0], #1
1037 ldrgeb r3, [r1], #1
1038 strgeb r3, [r0], #1
1039 ldrgtb r3, [r1], #1
1040 strgtb r3, [r0], #1
1041 ldmia sp!, {r0, pc}
1042
1043 /* erg - unaligned destination */
1044 .Lmemcpy_destul:
1045 rsb r12, r12, #4
1046 cmp r12, #2
1047
1048 /* align destination with byte copies */
1049 ldrb r3, [r1], #1
1050 strb r3, [r0], #1
1051 ldrgeb r3, [r1], #1
1052 strgeb r3, [r0], #1
1053 ldrgtb r3, [r1], #1
1054 strgtb r3, [r0], #1
1055 subs r2, r2, r12
1056 blt .Lmemcpy_l4 /* less the 4 bytes */
1057
1058 ands r12, r1, #3
1059 beq .Lmemcpy_t8 /* we have an aligned source */
1060
1061 /* erg - unaligned source */
1062 /* This is where it gets nasty ... */
1063 .Lmemcpy_srcul:
1064 bic r1, r1, #3
1065 ldr lr, [r1], #4
1066 cmp r12, #2
1067 bgt .Lmemcpy_srcul3
1068 beq .Lmemcpy_srcul2
1069 cmp r2, #0x0c
1070 blt .Lmemcpy_srcul1loop4
1071 sub r2, r2, #0x0c
1072 stmdb sp!, {r4, r5}
1073
1074 .Lmemcpy_srcul1loop16:
1075 mov r3, lr, lsr #8
1076 ldmia r1!, {r4, r5, r12, lr}
1077 orr r3, r3, r4, lsl #24
1078 mov r4, r4, lsr #8
1079 orr r4, r4, r5, lsl #24
1080 mov r5, r5, lsr #8
1081 orr r5, r5, r12, lsl #24
1082 mov r12, r12, lsr #8
1083 orr r12, r12, lr, lsl #24
1084 stmia r0!, {r3-r5, r12}
1085 subs r2, r2, #0x10
1086 bge .Lmemcpy_srcul1loop16
1087 ldmia sp!, {r4, r5}
1088 adds r2, r2, #0x0c
1089 blt .Lmemcpy_srcul1l4
1090
1091 .Lmemcpy_srcul1loop4:
1092 mov r12, lr, lsr #8
1093 ldr lr, [r1], #4
1094 orr r12, r12, lr, lsl #24
1095 str r12, [r0], #4
1096 subs r2, r2, #4
1097 bge .Lmemcpy_srcul1loop4
1098
1099 .Lmemcpy_srcul1l4:
1100 sub r1, r1, #3
1101 b .Lmemcpy_l4
1102
1103 .Lmemcpy_srcul2:
1104 cmp r2, #0x0c
1105 blt .Lmemcpy_srcul2loop4
1106 sub r2, r2, #0x0c
1107 stmdb sp!, {r4, r5}
1108
1109 .Lmemcpy_srcul2loop16:
1110 mov r3, lr, lsr #16
1111 ldmia r1!, {r4, r5, r12, lr}
1112 orr r3, r3, r4, lsl #16
1113 mov r4, r4, lsr #16
1114 orr r4, r4, r5, lsl #16
1115 mov r5, r5, lsr #16
1116 orr r5, r5, r12, lsl #16
1117 mov r12, r12, lsr #16
1118 orr r12, r12, lr, lsl #16
1119 stmia r0!, {r3-r5, r12}
1120 subs r2, r2, #0x10
1121 bge .Lmemcpy_srcul2loop16
1122 ldmia sp!, {r4, r5}
1123 adds r2, r2, #0x0c
1124 blt .Lmemcpy_srcul2l4
1125
1126 .Lmemcpy_srcul2loop4:
1127 mov r12, lr, lsr #16
1128 ldr lr, [r1], #4
1129 orr r12, r12, lr, lsl #16
1130 str r12, [r0], #4
1131 subs r2, r2, #4
1132 bge .Lmemcpy_srcul2loop4
1133
1134 .Lmemcpy_srcul2l4:
1135 sub r1, r1, #2
1136 b .Lmemcpy_l4
1137
1138 .Lmemcpy_srcul3:
1139 cmp r2, #0x0c
1140 blt .Lmemcpy_srcul3loop4
1141 sub r2, r2, #0x0c
1142 stmdb sp!, {r4, r5}
1143
1144 .Lmemcpy_srcul3loop16:
1145 mov r3, lr, lsr #24
1146 ldmia r1!, {r4, r5, r12, lr}
1147 orr r3, r3, r4, lsl #8
1148 mov r4, r4, lsr #24
1149 orr r4, r4, r5, lsl #8
1150 mov r5, r5, lsr #24
1151 orr r5, r5, r12, lsl #8
1152 mov r12, r12, lsr #24
1153 orr r12, r12, lr, lsl #8
1154 stmia r0!, {r3-r5, r12}
1155 subs r2, r2, #0x10
1156 bge .Lmemcpy_srcul3loop16
1157 ldmia sp!, {r4, r5}
1158 adds r2, r2, #0x0c
1159 blt .Lmemcpy_srcul3l4
1160
1161 .Lmemcpy_srcul3loop4:
1162 mov r12, lr, lsr #24
1163 ldr lr, [r1], #4
1164 orr r12, r12, lr, lsl #8
1165 str r12, [r0], #4
1166 subs r2, r2, #4
1167 bge .Lmemcpy_srcul3loop4
1168
1169 .Lmemcpy_srcul3l4:
1170 sub r1, r1, #1
1171 b .Lmemcpy_l4
1172 END(memcpy)
1173
1174 #else
1175 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1176 ENTRY(memcpy)
1177 pld [r1]
1178 cmp r2, #0x0c
1179 ble .Lmemcpy_short /* <= 12 bytes */
1180 #ifdef FLASHADDR
1181 #if FLASHADDR > PHYSADDR
1182 ldr r3, =FLASHADDR
1183 cmp r3, pc
1184 bls .Lnormal
1185 #else
1186 ldr r3, =FLASHADDR
1187 cmp r3, pc
1188 bhi .Lnormal
1189 #endif
1190 #endif
1191 ldr r3, .L_arm_memcpy
1192 ldr r3, [r3]
1193 cmp r3, #0
1194 beq .Lnormal
1195 ldr r3, .L_min_memcpy_size
1196 ldr r3, [r3]
1197 cmp r2, r3
1198 blt .Lnormal
1199 stmfd sp!, {r0-r2, r4, lr}
1200 mov r3, #0
1201 ldr r4, .L_arm_memcpy
1202 mov lr, pc
1203 ldr pc, [r4]
1204 cmp r0, #0
1205 ldmfd sp!, {r0-r2, r4, lr}
1206 RETeq
1207 .Lnormal:
1208 mov r3, r0 /* We must not clobber r0 */
1209
1210 /* Word-align the destination buffer */
1211 ands ip, r3, #0x03 /* Already word aligned? */
1212 beq .Lmemcpy_wordaligned /* Yup */
1213 cmp ip, #0x02
1214 ldrb ip, [r1], #0x01
1215 sub r2, r2, #0x01
1216 strb ip, [r3], #0x01
1217 ldrleb ip, [r1], #0x01
1218 suble r2, r2, #0x01
1219 strleb ip, [r3], #0x01
1220 ldrltb ip, [r1], #0x01
1221 sublt r2, r2, #0x01
1222 strltb ip, [r3], #0x01
1223
1224 /* Destination buffer is now word aligned */
1225 .Lmemcpy_wordaligned:
1226 ands ip, r1, #0x03 /* Is src also word-aligned? */
1227 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1228
1229 /* Quad-align the destination buffer */
1230 tst r3, #0x07 /* Already quad aligned? */
1231 ldrne ip, [r1], #0x04
1232 stmfd sp!, {r4-r9} /* Free up some registers */
1233 subne r2, r2, #0x04
1234 strne ip, [r3], #0x04
1235
1236 /* Destination buffer quad aligned, source is at least word aligned */
1237 subs r2, r2, #0x80
1238 blt .Lmemcpy_w_lessthan128
1239
1240 /* Copy 128 bytes at a time */
1241 .Lmemcpy_w_loop128:
1242 ldr r4, [r1], #0x04 /* LD:00-03 */
1243 ldr r5, [r1], #0x04 /* LD:04-07 */
1244 pld [r1, #0x18] /* Prefetch 0x20 */
1245 ldr r6, [r1], #0x04 /* LD:08-0b */
1246 ldr r7, [r1], #0x04 /* LD:0c-0f */
1247 ldr r8, [r1], #0x04 /* LD:10-13 */
1248 ldr r9, [r1], #0x04 /* LD:14-17 */
1249 strd r4, [r3], #0x08 /* ST:00-07 */
1250 ldr r4, [r1], #0x04 /* LD:18-1b */
1251 ldr r5, [r1], #0x04 /* LD:1c-1f */
1252 strd r6, [r3], #0x08 /* ST:08-0f */
1253 ldr r6, [r1], #0x04 /* LD:20-23 */
1254 ldr r7, [r1], #0x04 /* LD:24-27 */
1255 pld [r1, #0x18] /* Prefetch 0x40 */
1256 strd r8, [r3], #0x08 /* ST:10-17 */
1257 ldr r8, [r1], #0x04 /* LD:28-2b */
1258 ldr r9, [r1], #0x04 /* LD:2c-2f */
1259 strd r4, [r3], #0x08 /* ST:18-1f */
1260 ldr r4, [r1], #0x04 /* LD:30-33 */
1261 ldr r5, [r1], #0x04 /* LD:34-37 */
1262 strd r6, [r3], #0x08 /* ST:20-27 */
1263 ldr r6, [r1], #0x04 /* LD:38-3b */
1264 ldr r7, [r1], #0x04 /* LD:3c-3f */
1265 strd r8, [r3], #0x08 /* ST:28-2f */
1266 ldr r8, [r1], #0x04 /* LD:40-43 */
1267 ldr r9, [r1], #0x04 /* LD:44-47 */
1268 pld [r1, #0x18] /* Prefetch 0x60 */
1269 strd r4, [r3], #0x08 /* ST:30-37 */
1270 ldr r4, [r1], #0x04 /* LD:48-4b */
1271 ldr r5, [r1], #0x04 /* LD:4c-4f */
1272 strd r6, [r3], #0x08 /* ST:38-3f */
1273 ldr r6, [r1], #0x04 /* LD:50-53 */
1274 ldr r7, [r1], #0x04 /* LD:54-57 */
1275 strd r8, [r3], #0x08 /* ST:40-47 */
1276 ldr r8, [r1], #0x04 /* LD:58-5b */
1277 ldr r9, [r1], #0x04 /* LD:5c-5f */
1278 strd r4, [r3], #0x08 /* ST:48-4f */
1279 ldr r4, [r1], #0x04 /* LD:60-63 */
1280 ldr r5, [r1], #0x04 /* LD:64-67 */
1281 pld [r1, #0x18] /* Prefetch 0x80 */
1282 strd r6, [r3], #0x08 /* ST:50-57 */
1283 ldr r6, [r1], #0x04 /* LD:68-6b */
1284 ldr r7, [r1], #0x04 /* LD:6c-6f */
1285 strd r8, [r3], #0x08 /* ST:58-5f */
1286 ldr r8, [r1], #0x04 /* LD:70-73 */
1287 ldr r9, [r1], #0x04 /* LD:74-77 */
1288 strd r4, [r3], #0x08 /* ST:60-67 */
1289 ldr r4, [r1], #0x04 /* LD:78-7b */
1290 ldr r5, [r1], #0x04 /* LD:7c-7f */
1291 strd r6, [r3], #0x08 /* ST:68-6f */
1292 strd r8, [r3], #0x08 /* ST:70-77 */
1293 subs r2, r2, #0x80
1294 strd r4, [r3], #0x08 /* ST:78-7f */
1295 bge .Lmemcpy_w_loop128
1296
1297 .Lmemcpy_w_lessthan128:
1298 adds r2, r2, #0x80 /* Adjust for extra sub */
1299 ldmeqfd sp!, {r4-r9}
1300 RETeq /* Return now if done */
1301 subs r2, r2, #0x20
1302 blt .Lmemcpy_w_lessthan32
1303
1304 /* Copy 32 bytes at a time */
1305 .Lmemcpy_w_loop32:
1306 ldr r4, [r1], #0x04
1307 ldr r5, [r1], #0x04
1308 pld [r1, #0x18]
1309 ldr r6, [r1], #0x04
1310 ldr r7, [r1], #0x04
1311 ldr r8, [r1], #0x04
1312 ldr r9, [r1], #0x04
1313 strd r4, [r3], #0x08
1314 ldr r4, [r1], #0x04
1315 ldr r5, [r1], #0x04
1316 strd r6, [r3], #0x08
1317 strd r8, [r3], #0x08
1318 subs r2, r2, #0x20
1319 strd r4, [r3], #0x08
1320 bge .Lmemcpy_w_loop32
1321
1322 .Lmemcpy_w_lessthan32:
1323 adds r2, r2, #0x20 /* Adjust for extra sub */
1324 ldmeqfd sp!, {r4-r9}
1325 RETeq /* Return now if done */
1326
1327 and r4, r2, #0x18
1328 rsbs r4, r4, #0x18
1329 addne pc, pc, r4, lsl #1
1330 nop
1331
1332 /* At least 24 bytes remaining */
1333 ldr r4, [r1], #0x04
1334 ldr r5, [r1], #0x04
1335 sub r2, r2, #0x08
1336 strd r4, [r3], #0x08
1337
1338 /* At least 16 bytes remaining */
1339 ldr r4, [r1], #0x04
1340 ldr r5, [r1], #0x04
1341 sub r2, r2, #0x08
1342 strd r4, [r3], #0x08
1343
1344 /* At least 8 bytes remaining */
1345 ldr r4, [r1], #0x04
1346 ldr r5, [r1], #0x04
1347 subs r2, r2, #0x08
1348 strd r4, [r3], #0x08
1349
1350 /* Less than 8 bytes remaining */
1351 ldmfd sp!, {r4-r9}
1352 RETeq /* Return now if done */
1353 subs r2, r2, #0x04
1354 ldrge ip, [r1], #0x04
1355 strge ip, [r3], #0x04
1356 RETeq /* Return now if done */
1357 addlt r2, r2, #0x04
1358 ldrb ip, [r1], #0x01
1359 cmp r2, #0x02
1360 ldrgeb r2, [r1], #0x01
1361 strb ip, [r3], #0x01
1362 ldrgtb ip, [r1]
1363 strgeb r2, [r3], #0x01
1364 strgtb ip, [r3]
1365 RET
1366
1367
1368 /*
1369 * At this point, it has not been possible to word align both buffers.
1370 * The destination buffer is word aligned, but the source buffer is not.
1371 */
1372 .Lmemcpy_bad_align:
1373 stmfd sp!, {r4-r7}
1374 bic r1, r1, #0x03
1375 cmp ip, #2
1376 ldr ip, [r1], #0x04
1377 bgt .Lmemcpy_bad3
1378 beq .Lmemcpy_bad2
1379 b .Lmemcpy_bad1
1380
1381 .Lmemcpy_bad1_loop16:
1382 #ifdef __ARMEB__
1383 mov r4, ip, lsl #8
1384 #else
1385 mov r4, ip, lsr #8
1386 #endif
1387 ldr r5, [r1], #0x04
1388 pld [r1, #0x018]
1389 ldr r6, [r1], #0x04
1390 ldr r7, [r1], #0x04
1391 ldr ip, [r1], #0x04
1392 #ifdef __ARMEB__
1393 orr r4, r4, r5, lsr #24
1394 mov r5, r5, lsl #8
1395 orr r5, r5, r6, lsr #24
1396 mov r6, r6, lsl #8
1397 orr r6, r6, r7, lsr #24
1398 mov r7, r7, lsl #8
1399 orr r7, r7, ip, lsr #24
1400 #else
1401 orr r4, r4, r5, lsl #24
1402 mov r5, r5, lsr #8
1403 orr r5, r5, r6, lsl #24
1404 mov r6, r6, lsr #8
1405 orr r6, r6, r7, lsl #24
1406 mov r7, r7, lsr #8
1407 orr r7, r7, ip, lsl #24
1408 #endif
1409 str r4, [r3], #0x04
1410 str r5, [r3], #0x04
1411 str r6, [r3], #0x04
1412 str r7, [r3], #0x04
1413 .Lmemcpy_bad1:
1414 subs r2, r2, #0x10
1415 bge .Lmemcpy_bad1_loop16
1416
1417 adds r2, r2, #0x10
1418 ldmeqfd sp!, {r4-r7}
1419 RETeq /* Return now if done */
1420 subs r2, r2, #0x04
1421 sublt r1, r1, #0x03
1422 blt .Lmemcpy_bad_done
1423
1424 .Lmemcpy_bad1_loop4:
1425 #ifdef __ARMEB__
1426 mov r4, ip, lsl #8
1427 #else
1428 mov r4, ip, lsr #8
1429 #endif
1430 ldr ip, [r1], #0x04
1431 subs r2, r2, #0x04
1432 #ifdef __ARMEB__
1433 orr r4, r4, ip, lsr #24
1434 #else
1435 orr r4, r4, ip, lsl #24
1436 #endif
1437 str r4, [r3], #0x04
1438 bge .Lmemcpy_bad1_loop4
1439 sub r1, r1, #0x03
1440 b .Lmemcpy_bad_done
1441
1442 .Lmemcpy_bad2_loop16:
1443 #ifdef __ARMEB__
1444 mov r4, ip, lsl #16
1445 #else
1446 mov r4, ip, lsr #16
1447 #endif
1448 ldr r5, [r1], #0x04
1449 pld [r1, #0x018]
1450 ldr r6, [r1], #0x04
1451 ldr r7, [r1], #0x04
1452 ldr ip, [r1], #0x04
1453 #ifdef __ARMEB__
1454 orr r4, r4, r5, lsr #16
1455 mov r5, r5, lsl #16
1456 orr r5, r5, r6, lsr #16
1457 mov r6, r6, lsl #16
1458 orr r6, r6, r7, lsr #16
1459 mov r7, r7, lsl #16
1460 orr r7, r7, ip, lsr #16
1461 #else
1462 orr r4, r4, r5, lsl #16
1463 mov r5, r5, lsr #16
1464 orr r5, r5, r6, lsl #16
1465 mov r6, r6, lsr #16
1466 orr r6, r6, r7, lsl #16
1467 mov r7, r7, lsr #16
1468 orr r7, r7, ip, lsl #16
1469 #endif
1470 str r4, [r3], #0x04
1471 str r5, [r3], #0x04
1472 str r6, [r3], #0x04
1473 str r7, [r3], #0x04
1474 .Lmemcpy_bad2:
1475 subs r2, r2, #0x10
1476 bge .Lmemcpy_bad2_loop16
1477
1478 adds r2, r2, #0x10
1479 ldmeqfd sp!, {r4-r7}
1480 RETeq /* Return now if done */
1481 subs r2, r2, #0x04
1482 sublt r1, r1, #0x02
1483 blt .Lmemcpy_bad_done
1484
1485 .Lmemcpy_bad2_loop4:
1486 #ifdef __ARMEB__
1487 mov r4, ip, lsl #16
1488 #else
1489 mov r4, ip, lsr #16
1490 #endif
1491 ldr ip, [r1], #0x04
1492 subs r2, r2, #0x04
1493 #ifdef __ARMEB__
1494 orr r4, r4, ip, lsr #16
1495 #else
1496 orr r4, r4, ip, lsl #16
1497 #endif
1498 str r4, [r3], #0x04
1499 bge .Lmemcpy_bad2_loop4
1500 sub r1, r1, #0x02
1501 b .Lmemcpy_bad_done
1502
1503 .Lmemcpy_bad3_loop16:
1504 #ifdef __ARMEB__
1505 mov r4, ip, lsl #24
1506 #else
1507 mov r4, ip, lsr #24
1508 #endif
1509 ldr r5, [r1], #0x04
1510 pld [r1, #0x018]
1511 ldr r6, [r1], #0x04
1512 ldr r7, [r1], #0x04
1513 ldr ip, [r1], #0x04
1514 #ifdef __ARMEB__
1515 orr r4, r4, r5, lsr #8
1516 mov r5, r5, lsl #24
1517 orr r5, r5, r6, lsr #8
1518 mov r6, r6, lsl #24
1519 orr r6, r6, r7, lsr #8
1520 mov r7, r7, lsl #24
1521 orr r7, r7, ip, lsr #8
1522 #else
1523 orr r4, r4, r5, lsl #8
1524 mov r5, r5, lsr #24
1525 orr r5, r5, r6, lsl #8
1526 mov r6, r6, lsr #24
1527 orr r6, r6, r7, lsl #8
1528 mov r7, r7, lsr #24
1529 orr r7, r7, ip, lsl #8
1530 #endif
1531 str r4, [r3], #0x04
1532 str r5, [r3], #0x04
1533 str r6, [r3], #0x04
1534 str r7, [r3], #0x04
1535 .Lmemcpy_bad3:
1536 subs r2, r2, #0x10
1537 bge .Lmemcpy_bad3_loop16
1538
1539 adds r2, r2, #0x10
1540 ldmeqfd sp!, {r4-r7}
1541 RETeq /* Return now if done */
1542 subs r2, r2, #0x04
1543 sublt r1, r1, #0x01
1544 blt .Lmemcpy_bad_done
1545
1546 .Lmemcpy_bad3_loop4:
1547 #ifdef __ARMEB__
1548 mov r4, ip, lsl #24
1549 #else
1550 mov r4, ip, lsr #24
1551 #endif
1552 ldr ip, [r1], #0x04
1553 subs r2, r2, #0x04
1554 #ifdef __ARMEB__
1555 orr r4, r4, ip, lsr #8
1556 #else
1557 orr r4, r4, ip, lsl #8
1558 #endif
1559 str r4, [r3], #0x04
1560 bge .Lmemcpy_bad3_loop4
1561 sub r1, r1, #0x01
1562
1563 .Lmemcpy_bad_done:
1564 ldmfd sp!, {r4-r7}
1565 adds r2, r2, #0x04
1566 RETeq
1567 ldrb ip, [r1], #0x01
1568 cmp r2, #0x02
1569 ldrgeb r2, [r1], #0x01
1570 strb ip, [r3], #0x01
1571 ldrgtb ip, [r1]
1572 strgeb r2, [r3], #0x01
1573 strgtb ip, [r3]
1574 RET
1575
1576
1577 /*
1578 * Handle short copies (less than 16 bytes), possibly misaligned.
1579 * Some of these are *very* common, thanks to the network stack,
1580 * and so are handled specially.
1581 */
1582 .Lmemcpy_short:
1583 add pc, pc, r2, lsl #2
1584 nop
1585 RET /* 0x00 */
1586 b .Lmemcpy_bytewise /* 0x01 */
1587 b .Lmemcpy_bytewise /* 0x02 */
1588 b .Lmemcpy_bytewise /* 0x03 */
1589 b .Lmemcpy_4 /* 0x04 */
1590 b .Lmemcpy_bytewise /* 0x05 */
1591 b .Lmemcpy_6 /* 0x06 */
1592 b .Lmemcpy_bytewise /* 0x07 */
1593 b .Lmemcpy_8 /* 0x08 */
1594 b .Lmemcpy_bytewise /* 0x09 */
1595 b .Lmemcpy_bytewise /* 0x0a */
1596 b .Lmemcpy_bytewise /* 0x0b */
1597 b .Lmemcpy_c /* 0x0c */
1598 .Lmemcpy_bytewise:
1599 mov r3, r0 /* We must not clobber r0 */
1600 ldrb ip, [r1], #0x01
1601 1: subs r2, r2, #0x01
1602 strb ip, [r3], #0x01
1603 ldrneb ip, [r1], #0x01
1604 bne 1b
1605 RET
1606
1607 /******************************************************************************
1608 * Special case for 4 byte copies
1609 */
1610 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1611 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1612 LMEMCPY_4_PAD
1613 .Lmemcpy_4:
1614 and r2, r1, #0x03
1615 orr r2, r2, r0, lsl #2
1616 ands r2, r2, #0x0f
1617 sub r3, pc, #0x14
1618 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1619
1620 /*
1621 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1622 */
1623 ldr r2, [r1]
1624 str r2, [r0]
1625 RET
1626 LMEMCPY_4_PAD
1627
1628 /*
1629 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1630 */
1631 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1632 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1633 #ifdef __ARMEB__
1634 mov r3, r3, lsl #8 /* r3 = 012. */
1635 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1636 #else
1637 mov r3, r3, lsr #8 /* r3 = .210 */
1638 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1639 #endif
1640 str r3, [r0]
1641 RET
1642 LMEMCPY_4_PAD
1643
1644 /*
1645 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1646 */
1647 #ifdef __ARMEB__
1648 ldrh r3, [r1]
1649 ldrh r2, [r1, #0x02]
1650 #else
1651 ldrh r3, [r1, #0x02]
1652 ldrh r2, [r1]
1653 #endif
1654 orr r3, r2, r3, lsl #16
1655 str r3, [r0]
1656 RET
1657 LMEMCPY_4_PAD
1658
1659 /*
1660 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1661 */
1662 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1663 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1664 #ifdef __ARMEB__
1665 mov r3, r3, lsl #24 /* r3 = 0... */
1666 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1667 #else
1668 mov r3, r3, lsr #24 /* r3 = ...0 */
1669 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1670 #endif
1671 str r3, [r0]
1672 RET
1673 LMEMCPY_4_PAD
1674
1675 /*
1676 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1677 */
1678 ldr r2, [r1]
1679 #ifdef __ARMEB__
1680 strb r2, [r0, #0x03]
1681 mov r3, r2, lsr #8
1682 mov r1, r2, lsr #24
1683 strb r1, [r0]
1684 #else
1685 strb r2, [r0]
1686 mov r3, r2, lsr #8
1687 mov r1, r2, lsr #24
1688 strb r1, [r0, #0x03]
1689 #endif
1690 strh r3, [r0, #0x01]
1691 RET
1692 LMEMCPY_4_PAD
1693
1694 /*
1695 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1696 */
1697 ldrb r2, [r1]
1698 ldrh r3, [r1, #0x01]
1699 ldrb r1, [r1, #0x03]
1700 strb r2, [r0]
1701 strh r3, [r0, #0x01]
1702 strb r1, [r0, #0x03]
1703 RET
1704 LMEMCPY_4_PAD
1705
1706 /*
1707 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1708 */
1709 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1710 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1711 #ifdef __ARMEB__
1712 mov r1, r2, lsr #8 /* r1 = ...0 */
1713 strb r1, [r0]
1714 mov r2, r2, lsl #8 /* r2 = .01. */
1715 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1716 #else
1717 strb r2, [r0]
1718 mov r2, r2, lsr #8 /* r2 = ...1 */
1719 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1720 mov r3, r3, lsr #8 /* r3 = ...3 */
1721 #endif
1722 strh r2, [r0, #0x01]
1723 strb r3, [r0, #0x03]
1724 RET
1725 LMEMCPY_4_PAD
1726
1727 /*
1728 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1729 */
1730 ldrb r2, [r1]
1731 ldrh r3, [r1, #0x01]
1732 ldrb r1, [r1, #0x03]
1733 strb r2, [r0]
1734 strh r3, [r0, #0x01]
1735 strb r1, [r0, #0x03]
1736 RET
1737 LMEMCPY_4_PAD
1738
1739 /*
1740 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1741 */
1742 ldr r2, [r1]
1743 #ifdef __ARMEB__
1744 strh r2, [r0, #0x02]
1745 mov r3, r2, lsr #16
1746 strh r3, [r0]
1747 #else
1748 strh r2, [r0]
1749 mov r3, r2, lsr #16
1750 strh r3, [r0, #0x02]
1751 #endif
1752 RET
1753 LMEMCPY_4_PAD
1754
1755 /*
1756 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1757 */
1758 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1759 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1760 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1761 strh r1, [r0]
1762 #ifdef __ARMEB__
1763 mov r2, r2, lsl #8 /* r2 = 012. */
1764 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1765 #else
1766 mov r2, r2, lsr #24 /* r2 = ...2 */
1767 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1768 #endif
1769 strh r2, [r0, #0x02]
1770 RET
1771 LMEMCPY_4_PAD
1772
1773 /*
1774 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1775 */
1776 ldrh r2, [r1]
1777 ldrh r3, [r1, #0x02]
1778 strh r2, [r0]
1779 strh r3, [r0, #0x02]
1780 RET
1781 LMEMCPY_4_PAD
1782
1783 /*
1784 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1785 */
1786 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1787 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1788 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1789 strh r1, [r0, #0x02]
1790 #ifdef __ARMEB__
1791 mov r3, r3, lsr #24 /* r3 = ...1 */
1792 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1793 #else
1794 mov r3, r3, lsl #8 /* r3 = 321. */
1795 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1796 #endif
1797 strh r3, [r0]
1798 RET
1799 LMEMCPY_4_PAD
1800
1801 /*
1802 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1803 */
1804 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1805 #ifdef __ARMEB__
1806 strb r2, [r0, #0x03]
1807 mov r3, r2, lsr #8
1808 mov r1, r2, lsr #24
1809 strh r3, [r0, #0x01]
1810 strb r1, [r0]
1811 #else
1812 strb r2, [r0]
1813 mov r3, r2, lsr #8
1814 mov r1, r2, lsr #24
1815 strh r3, [r0, #0x01]
1816 strb r1, [r0, #0x03]
1817 #endif
1818 RET
1819 LMEMCPY_4_PAD
1820
1821 /*
1822 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1823 */
1824 ldrb r2, [r1]
1825 ldrh r3, [r1, #0x01]
1826 ldrb r1, [r1, #0x03]
1827 strb r2, [r0]
1828 strh r3, [r0, #0x01]
1829 strb r1, [r0, #0x03]
1830 RET
1831 LMEMCPY_4_PAD
1832
1833 /*
1834 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1835 */
1836 #ifdef __ARMEB__
1837 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1838 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1839 strb r3, [r0, #0x03]
1840 mov r3, r3, lsr #8 /* r3 = ...2 */
1841 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1842 strh r3, [r0, #0x01]
1843 mov r2, r2, lsr #8 /* r2 = ...0 */
1844 strb r2, [r0]
1845 #else
1846 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1847 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1848 strb r2, [r0]
1849 mov r2, r2, lsr #8 /* r2 = ...1 */
1850 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1851 strh r2, [r0, #0x01]
1852 mov r3, r3, lsr #8 /* r3 = ...3 */
1853 strb r3, [r0, #0x03]
1854 #endif
1855 RET
1856 LMEMCPY_4_PAD
1857
1858 /*
1859 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1860 */
1861 ldrb r2, [r1]
1862 ldrh r3, [r1, #0x01]
1863 ldrb r1, [r1, #0x03]
1864 strb r2, [r0]
1865 strh r3, [r0, #0x01]
1866 strb r1, [r0, #0x03]
1867 RET
1868 LMEMCPY_4_PAD
1869
1870
1871 /******************************************************************************
1872 * Special case for 6 byte copies
1873 */
1874 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1875 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1876 LMEMCPY_6_PAD
1877 .Lmemcpy_6:
1878 and r2, r1, #0x03
1879 orr r2, r2, r0, lsl #2
1880 ands r2, r2, #0x0f
1881 sub r3, pc, #0x14
1882 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1883
1884 /*
1885 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1886 */
1887 ldr r2, [r1]
1888 ldrh r3, [r1, #0x04]
1889 str r2, [r0]
1890 strh r3, [r0, #0x04]
1891 RET
1892 LMEMCPY_6_PAD
1893
1894 /*
1895 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1896 */
1897 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1898 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1899 #ifdef __ARMEB__
1900 mov r2, r2, lsl #8 /* r2 = 012. */
1901 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1902 #else
1903 mov r2, r2, lsr #8 /* r2 = .210 */
1904 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1905 #endif
1906 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1907 str r2, [r0]
1908 strh r3, [r0, #0x04]
1909 RET
1910 LMEMCPY_6_PAD
1911
1912 /*
1913 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1914 */
1915 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1916 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1917 #ifdef __ARMEB__
1918 mov r1, r3, lsr #16 /* r1 = ..23 */
1919 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1920 str r1, [r0]
1921 strh r3, [r0, #0x04]
1922 #else
1923 mov r1, r3, lsr #16 /* r1 = ..54 */
1924 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1925 str r2, [r0]
1926 strh r1, [r0, #0x04]
1927 #endif
1928 RET
1929 LMEMCPY_6_PAD
1930
1931 /*
1932 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1933 */
1934 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1935 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1936 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1937 #ifdef __ARMEB__
1938 mov r2, r2, lsl #24 /* r2 = 0... */
1939 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1940 mov r3, r3, lsl #8 /* r3 = 234. */
1941 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1942 #else
1943 mov r2, r2, lsr #24 /* r2 = ...0 */
1944 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1945 mov r1, r1, lsl #8 /* r1 = xx5. */
1946 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1947 #endif
1948 str r2, [r0]
1949 strh r1, [r0, #0x04]
1950 RET
1951 LMEMCPY_6_PAD
1952
1953 /*
1954 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1955 */
1956 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1957 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1958 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1959 strh r1, [r0, #0x01]
1960 #ifdef __ARMEB__
1961 mov r1, r3, lsr #24 /* r1 = ...0 */
1962 strb r1, [r0]
1963 mov r3, r3, lsl #8 /* r3 = 123. */
1964 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1965 #else
1966 strb r3, [r0]
1967 mov r3, r3, lsr #24 /* r3 = ...3 */
1968 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1969 mov r2, r2, lsr #8 /* r2 = ...5 */
1970 #endif
1971 strh r3, [r0, #0x03]
1972 strb r2, [r0, #0x05]
1973 RET
1974 LMEMCPY_6_PAD
1975
1976 /*
1977 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1978 */
1979 ldrb r2, [r1]
1980 ldrh r3, [r1, #0x01]
1981 ldrh ip, [r1, #0x03]
1982 ldrb r1, [r1, #0x05]
1983 strb r2, [r0]
1984 strh r3, [r0, #0x01]
1985 strh ip, [r0, #0x03]
1986 strb r1, [r0, #0x05]
1987 RET
1988 LMEMCPY_6_PAD
1989
1990 /*
1991 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1992 */
1993 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1994 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1995 #ifdef __ARMEB__
1996 mov r3, r2, lsr #8 /* r3 = ...0 */
1997 strb r3, [r0]
1998 strb r1, [r0, #0x05]
1999 mov r3, r1, lsr #8 /* r3 = .234 */
2000 strh r3, [r0, #0x03]
2001 mov r3, r2, lsl #8 /* r3 = .01. */
2002 orr r3, r3, r1, lsr #24 /* r3 = .012 */
2003 strh r3, [r0, #0x01]
2004 #else
2005 strb r2, [r0]
2006 mov r3, r1, lsr #24
2007 strb r3, [r0, #0x05]
2008 mov r3, r1, lsr #8 /* r3 = .543 */
2009 strh r3, [r0, #0x03]
2010 mov r3, r2, lsr #8 /* r3 = ...1 */
2011 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2012 strh r3, [r0, #0x01]
2013 #endif
2014 RET
2015 LMEMCPY_6_PAD
2016
2017 /*
2018 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2019 */
2020 ldrb r2, [r1]
2021 ldrh r3, [r1, #0x01]
2022 ldrh ip, [r1, #0x03]
2023 ldrb r1, [r1, #0x05]
2024 strb r2, [r0]
2025 strh r3, [r0, #0x01]
2026 strh ip, [r0, #0x03]
2027 strb r1, [r0, #0x05]
2028 RET
2029 LMEMCPY_6_PAD
2030
2031 /*
2032 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2033 */
2034 #ifdef __ARMEB__
2035 ldr r2, [r1] /* r2 = 0123 */
2036 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2037 mov r1, r2, lsr #16 /* r1 = ..01 */
2038 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2039 strh r1, [r0]
2040 str r3, [r0, #0x02]
2041 #else
2042 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2043 ldr r3, [r1] /* r3 = 3210 */
2044 mov r2, r2, lsl #16 /* r2 = 54.. */
2045 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2046 strh r3, [r0]
2047 str r2, [r0, #0x02]
2048 #endif
2049 RET
2050 LMEMCPY_6_PAD
2051
2052 /*
2053 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2054 */
2055 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2056 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2057 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2058 #ifdef __ARMEB__
2059 mov r2, r2, lsr #8 /* r2 = .345 */
2060 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2061 #else
2062 mov r2, r2, lsl #8 /* r2 = 543. */
2063 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2064 #endif
2065 strh r1, [r0]
2066 str r2, [r0, #0x02]
2067 RET
2068 LMEMCPY_6_PAD
2069
2070 /*
2071 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2072 */
2073 ldrh r2, [r1]
2074 ldr r3, [r1, #0x02]
2075 strh r2, [r0]
2076 str r3, [r0, #0x02]
2077 RET
2078 LMEMCPY_6_PAD
2079
2080 /*
2081 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2082 */
2083 ldrb r3, [r1] /* r3 = ...0 */
2084 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2085 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2086 #ifdef __ARMEB__
2087 mov r3, r3, lsl #8 /* r3 = ..0. */
2088 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2089 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2090 #else
2091 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2092 mov r1, r1, lsl #24 /* r1 = 5... */
2093 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2094 #endif
2095 strh r3, [r0]
2096 str r1, [r0, #0x02]
2097 RET
2098 LMEMCPY_6_PAD
2099
2100 /*
2101 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2102 */
2103 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2104 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2105 #ifdef __ARMEB__
2106 mov r3, r2, lsr #24 /* r3 = ...0 */
2107 strb r3, [r0]
2108 mov r2, r2, lsl #8 /* r2 = 123. */
2109 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2110 #else
2111 strb r2, [r0]
2112 mov r2, r2, lsr #8 /* r2 = .321 */
2113 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2114 mov r1, r1, lsr #8 /* r1 = ...5 */
2115 #endif
2116 str r2, [r0, #0x01]
2117 strb r1, [r0, #0x05]
2118 RET
2119 LMEMCPY_6_PAD
2120
2121 /*
2122 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2123 */
2124 ldrb r2, [r1]
2125 ldrh r3, [r1, #0x01]
2126 ldrh ip, [r1, #0x03]
2127 ldrb r1, [r1, #0x05]
2128 strb r2, [r0]
2129 strh r3, [r0, #0x01]
2130 strh ip, [r0, #0x03]
2131 strb r1, [r0, #0x05]
2132 RET
2133 LMEMCPY_6_PAD
2134
2135 /*
2136 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2137 */
2138 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2139 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2140 #ifdef __ARMEB__
2141 mov r3, r2, lsr #8 /* r3 = ...0 */
2142 strb r3, [r0]
2143 mov r2, r2, lsl #24 /* r2 = 1... */
2144 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2145 #else
2146 strb r2, [r0]
2147 mov r2, r2, lsr #8 /* r2 = ...1 */
2148 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2149 mov r1, r1, lsr #24 /* r1 = ...5 */
2150 #endif
2151 str r2, [r0, #0x01]
2152 strb r1, [r0, #0x05]
2153 RET
2154 LMEMCPY_6_PAD
2155
2156 /*
2157 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2158 */
2159 ldrb r2, [r1]
2160 ldr r3, [r1, #0x01]
2161 ldrb r1, [r1, #0x05]
2162 strb r2, [r0]
2163 str r3, [r0, #0x01]
2164 strb r1, [r0, #0x05]
2165 RET
2166 LMEMCPY_6_PAD
2167
2168
2169 /******************************************************************************
2170 * Special case for 8 byte copies
2171 */
2172 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2173 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2174 LMEMCPY_8_PAD
2175 .Lmemcpy_8:
2176 and r2, r1, #0x03
2177 orr r2, r2, r0, lsl #2
2178 ands r2, r2, #0x0f
2179 sub r3, pc, #0x14
2180 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2181
2182 /*
2183 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2184 */
2185 ldr r2, [r1]
2186 ldr r3, [r1, #0x04]
2187 str r2, [r0]
2188 str r3, [r0, #0x04]
2189 RET
2190 LMEMCPY_8_PAD
2191
2192 /*
2193 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2194 */
2195 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2196 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2197 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2198 #ifdef __ARMEB__
2199 mov r3, r3, lsl #8 /* r3 = 012. */
2200 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2201 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2202 #else
2203 mov r3, r3, lsr #8 /* r3 = .210 */
2204 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2205 mov r1, r1, lsl #24 /* r1 = 7... */
2206 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2207 #endif
2208 str r3, [r0]
2209 str r2, [r0, #0x04]
2210 RET
2211 LMEMCPY_8_PAD
2212
2213 /*
2214 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2215 */
2216 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2217 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2218 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2219 #ifdef __ARMEB__
2220 mov r2, r2, lsl #16 /* r2 = 01.. */
2221 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2222 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2223 #else
2224 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2225 mov r3, r3, lsr #16 /* r3 = ..54 */
2226 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2227 #endif
2228 str r2, [r0]
2229 str r3, [r0, #0x04]
2230 RET
2231 LMEMCPY_8_PAD
2232
2233 /*
2234 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2235 */
2236 ldrb r3, [r1] /* r3 = ...0 */
2237 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2238 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2239 #ifdef __ARMEB__
2240 mov r3, r3, lsl #24 /* r3 = 0... */
2241 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2242 mov r2, r2, lsl #24 /* r2 = 4... */
2243 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2244 #else
2245 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2246 mov r2, r2, lsr #24 /* r2 = ...4 */
2247 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2248 #endif
2249 str r3, [r0]
2250 str r2, [r0, #0x04]
2251 RET
2252 LMEMCPY_8_PAD
2253
2254 /*
2255 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2256 */
2257 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2258 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2259 #ifdef __ARMEB__
2260 mov r1, r3, lsr #24 /* r1 = ...0 */
2261 strb r1, [r0]
2262 mov r1, r3, lsr #8 /* r1 = .012 */
2263 strb r2, [r0, #0x07]
2264 mov r3, r3, lsl #24 /* r3 = 3... */
2265 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2266 #else
2267 strb r3, [r0]
2268 mov r1, r2, lsr #24 /* r1 = ...7 */
2269 strb r1, [r0, #0x07]
2270 mov r1, r3, lsr #8 /* r1 = .321 */
2271 mov r3, r3, lsr #24 /* r3 = ...3 */
2272 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2273 #endif
2274 strh r1, [r0, #0x01]
2275 str r3, [r0, #0x03]
2276 RET
2277 LMEMCPY_8_PAD
2278
2279 /*
2280 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2281 */
2282 ldrb r2, [r1]
2283 ldrh r3, [r1, #0x01]
2284 ldr ip, [r1, #0x03]
2285 ldrb r1, [r1, #0x07]
2286 strb r2, [r0]
2287 strh r3, [r0, #0x01]
2288 str ip, [r0, #0x03]
2289 strb r1, [r0, #0x07]
2290 RET
2291 LMEMCPY_8_PAD
2292
2293 /*
2294 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2295 */
2296 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2297 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2298 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2299 #ifdef __ARMEB__
2300 mov ip, r2, lsr #8 /* ip = ...0 */
2301 strb ip, [r0]
2302 mov ip, r2, lsl #8 /* ip = .01. */
2303 orr ip, ip, r3, lsr #24 /* ip = .012 */
2304 strb r1, [r0, #0x07]
2305 mov r3, r3, lsl #8 /* r3 = 345. */
2306 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2307 #else
2308 strb r2, [r0] /* 0 */
2309 mov ip, r1, lsr #8 /* ip = ...7 */
2310 strb ip, [r0, #0x07] /* 7 */
2311 mov ip, r2, lsr #8 /* ip = ...1 */
2312 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2313 mov r3, r3, lsr #8 /* r3 = .543 */
2314 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2315 #endif
2316 strh ip, [r0, #0x01]
2317 str r3, [r0, #0x03]
2318 RET
2319 LMEMCPY_8_PAD
2320
2321 /*
2322 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2323 */
2324 ldrb r3, [r1] /* r3 = ...0 */
2325 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2326 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2327 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2328 strb r3, [r0]
2329 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2330 #ifdef __ARMEB__
2331 strh r3, [r0, #0x01]
2332 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2333 #else
2334 strh ip, [r0, #0x01]
2335 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2336 #endif
2337 str r2, [r0, #0x03]
2338 strb r1, [r0, #0x07]
2339 RET
2340 LMEMCPY_8_PAD
2341
2342 /*
2343 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2344 */
2345 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2346 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2347 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2348 #ifdef __ARMEB__
2349 strh r1, [r0]
2350 mov r1, r3, lsr #16 /* r1 = ..45 */
2351 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2352 #else
2353 strh r2, [r0]
2354 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2355 mov r3, r3, lsr #16 /* r3 = ..76 */
2356 #endif
2357 str r2, [r0, #0x02]
2358 strh r3, [r0, #0x06]
2359 RET
2360 LMEMCPY_8_PAD
2361
2362 /*
2363 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2364 */
2365 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2366 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2367 ldrb ip, [r1, #0x07] /* ip = ...7 */
2368 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2369 strh r1, [r0]
2370 #ifdef __ARMEB__
2371 mov r1, r2, lsl #24 /* r1 = 2... */
2372 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2373 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2374 #else
2375 mov r1, r2, lsr #24 /* r1 = ...2 */
2376 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2377 mov r3, r3, lsr #24 /* r3 = ...6 */
2378 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2379 #endif
2380 str r1, [r0, #0x02]
2381 strh r3, [r0, #0x06]
2382 RET
2383 LMEMCPY_8_PAD
2384
2385 /*
2386 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2387 */
2388 ldrh r2, [r1]
2389 ldr ip, [r1, #0x02]
2390 ldrh r3, [r1, #0x06]
2391 strh r2, [r0]
2392 str ip, [r0, #0x02]
2393 strh r3, [r0, #0x06]
2394 RET
2395 LMEMCPY_8_PAD
2396
2397 /*
2398 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2399 */
2400 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2401 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2402 ldrb ip, [r1] /* ip = ...0 */
2403 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2404 strh r1, [r0, #0x06]
2405 #ifdef __ARMEB__
2406 mov r3, r3, lsr #24 /* r3 = ...5 */
2407 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2408 mov r2, r2, lsr #24 /* r2 = ...1 */
2409 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2410 #else
2411 mov r3, r3, lsl #24 /* r3 = 5... */
2412 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2413 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2414 #endif
2415 str r3, [r0, #0x02]
2416 strh r2, [r0]
2417 RET
2418 LMEMCPY_8_PAD
2419
2420 /*
2421 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2422 */
2423 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2424 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2425 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2426 strh r1, [r0, #0x05]
2427 #ifdef __ARMEB__
2428 strb r3, [r0, #0x07]
2429 mov r1, r2, lsr #24 /* r1 = ...0 */
2430 strb r1, [r0]
2431 mov r2, r2, lsl #8 /* r2 = 123. */
2432 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2433 str r2, [r0, #0x01]
2434 #else
2435 strb r2, [r0]
2436 mov r1, r3, lsr #24 /* r1 = ...7 */
2437 strb r1, [r0, #0x07]
2438 mov r2, r2, lsr #8 /* r2 = .321 */
2439 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2440 str r2, [r0, #0x01]
2441 #endif
2442 RET
2443 LMEMCPY_8_PAD
2444
2445 /*
2446 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2447 */
2448 ldrb r3, [r1] /* r3 = ...0 */
2449 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2450 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2451 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2452 strb r3, [r0]
2453 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2454 #ifdef __ARMEB__
2455 strh ip, [r0, #0x05]
2456 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2457 #else
2458 strh r3, [r0, #0x05]
2459 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2460 #endif
2461 str r2, [r0, #0x01]
2462 strb r1, [r0, #0x07]
2463 RET
2464 LMEMCPY_8_PAD
2465
2466 /*
2467 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2468 */
2469 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2470 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2471 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2472 #ifdef __ARMEB__
2473 mov ip, r2, lsr #8 /* ip = ...0 */
2474 strb ip, [r0]
2475 mov ip, r2, lsl #24 /* ip = 1... */
2476 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2477 strb r1, [r0, #0x07]
2478 mov r1, r1, lsr #8 /* r1 = ...6 */
2479 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2480 #else
2481 strb r2, [r0]
2482 mov ip, r2, lsr #8 /* ip = ...1 */
2483 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2484 mov r2, r1, lsr #8 /* r2 = ...7 */
2485 strb r2, [r0, #0x07]
2486 mov r1, r1, lsl #8 /* r1 = .76. */
2487 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2488 #endif
2489 str ip, [r0, #0x01]
2490 strh r1, [r0, #0x05]
2491 RET
2492 LMEMCPY_8_PAD
2493
2494 /*
2495 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2496 */
2497 ldrb r2, [r1]
2498 ldr ip, [r1, #0x01]
2499 ldrh r3, [r1, #0x05]
2500 ldrb r1, [r1, #0x07]
2501 strb r2, [r0]
2502 str ip, [r0, #0x01]
2503 strh r3, [r0, #0x05]
2504 strb r1, [r0, #0x07]
2505 RET
2506 LMEMCPY_8_PAD
2507
2508 /******************************************************************************
2509 * Special case for 12 byte copies
2510 */
2511 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2512 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2513 LMEMCPY_C_PAD
2514 .Lmemcpy_c:
2515 and r2, r1, #0x03
2516 orr r2, r2, r0, lsl #2
2517 ands r2, r2, #0x0f
2518 sub r3, pc, #0x14
2519 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2520
2521 /*
2522 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2523 */
2524 ldr r2, [r1]
2525 ldr r3, [r1, #0x04]
2526 ldr r1, [r1, #0x08]
2527 str r2, [r0]
2528 str r3, [r0, #0x04]
2529 str r1, [r0, #0x08]
2530 RET
2531 LMEMCPY_C_PAD
2532
2533 /*
2534 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2535 */
2536 ldrb r2, [r1, #0xb] /* r2 = ...B */
2537 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2538 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2539 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2540 #ifdef __ARMEB__
2541 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2542 str r2, [r0, #0x08]
2543 mov r2, ip, lsr #24 /* r2 = ...7 */
2544 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2545 mov r1, r1, lsl #8 /* r1 = 012. */
2546 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2547 #else
2548 mov r2, r2, lsl #24 /* r2 = B... */
2549 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2550 str r2, [r0, #0x08]
2551 mov r2, ip, lsl #24 /* r2 = 7... */
2552 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2553 mov r1, r1, lsr #8 /* r1 = .210 */
2554 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2555 #endif
2556 str r2, [r0, #0x04]
2557 str r1, [r0]
2558 RET
2559 LMEMCPY_C_PAD
2560
2561 /*
2562 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2563 */
2564 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2565 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2566 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2567 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2568 #ifdef __ARMEB__
2569 mov r2, r2, lsl #16 /* r2 = 01.. */
2570 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2571 str r2, [r0]
2572 mov r3, r3, lsl #16 /* r3 = 45.. */
2573 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2574 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2575 #else
2576 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2577 str r2, [r0]
2578 mov r3, r3, lsr #16 /* r3 = ..54 */
2579 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2580 mov r1, r1, lsl #16 /* r1 = BA.. */
2581 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2582 #endif
2583 str r3, [r0, #0x04]
2584 str r1, [r0, #0x08]
2585 RET
2586 LMEMCPY_C_PAD
2587
2588 /*
2589 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2590 */
2591 ldrb r2, [r1] /* r2 = ...0 */
2592 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2593 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2594 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2595 #ifdef __ARMEB__
2596 mov r2, r2, lsl #24 /* r2 = 0... */
2597 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2598 str r2, [r0]
2599 mov r3, r3, lsl #24 /* r3 = 4... */
2600 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2601 mov r1, r1, lsr #8 /* r1 = .9AB */
2602 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2603 #else
2604 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2605 str r2, [r0]
2606 mov r3, r3, lsr #24 /* r3 = ...4 */
2607 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2608 mov r1, r1, lsl #8 /* r1 = BA9. */
2609 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2610 #endif
2611 str r3, [r0, #0x04]
2612 str r1, [r0, #0x08]
2613 RET
2614 LMEMCPY_C_PAD
2615
2616 /*
2617 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2618 */
2619 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2620 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2621 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2622 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2623 strh r1, [r0, #0x01]
2624 #ifdef __ARMEB__
2625 mov r1, r2, lsr #24 /* r1 = ...0 */
2626 strb r1, [r0]
2627 mov r1, r2, lsl #24 /* r1 = 3... */
2628 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2629 mov r1, r3, lsl #24 /* r1 = 7... */
2630 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2631 #else
2632 strb r2, [r0]
2633 mov r1, r2, lsr #24 /* r1 = ...3 */
2634 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2635 mov r1, r3, lsr #24 /* r1 = ...7 */
2636 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2637 mov ip, ip, lsr #24 /* ip = ...B */
2638 #endif
2639 str r2, [r0, #0x03]
2640 str r1, [r0, #0x07]
2641 strb ip, [r0, #0x0b]
2642 RET
2643 LMEMCPY_C_PAD
2644
2645 /*
2646 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2647 */
2648 ldrb r2, [r1]
2649 ldrh r3, [r1, #0x01]
2650 ldr ip, [r1, #0x03]
2651 strb r2, [r0]
2652 ldr r2, [r1, #0x07]
2653 ldrb r1, [r1, #0x0b]
2654 strh r3, [r0, #0x01]
2655 str ip, [r0, #0x03]
2656 str r2, [r0, #0x07]
2657 strb r1, [r0, #0x0b]
2658 RET
2659 LMEMCPY_C_PAD
2660
2661 /*
2662 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2663 */
2664 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2665 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2666 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2667 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2668 #ifdef __ARMEB__
2669 mov r2, r2, ror #8 /* r2 = 1..0 */
2670 strb r2, [r0]
2671 mov r2, r2, lsr #16 /* r2 = ..1. */
2672 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2673 strh r2, [r0, #0x01]
2674 mov r2, r3, lsl #8 /* r2 = 345. */
2675 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2676 mov r2, ip, lsl #8 /* r2 = 789. */
2677 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2678 #else
2679 strb r2, [r0]
2680 mov r2, r2, lsr #8 /* r2 = ...1 */
2681 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2682 strh r2, [r0, #0x01]
2683 mov r2, r3, lsr #8 /* r2 = .543 */
2684 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2685 mov r2, ip, lsr #8 /* r2 = .987 */
2686 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2687 mov r1, r1, lsr #8 /* r1 = ...B */
2688 #endif
2689 str r3, [r0, #0x03]
2690 str r2, [r0, #0x07]
2691 strb r1, [r0, #0x0b]
2692 RET
2693 LMEMCPY_C_PAD
2694
2695 /*
2696 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2697 */
2698 ldrb r2, [r1]
2699 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2700 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2701 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2702 strb r2, [r0]
2703 #ifdef __ARMEB__
2704 mov r2, r3, lsr #16 /* r2 = ..12 */
2705 strh r2, [r0, #0x01]
2706 mov r3, r3, lsl #16 /* r3 = 34.. */
2707 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2708 mov ip, ip, lsl #16 /* ip = 78.. */
2709 orr ip, ip, r1, lsr #16 /* ip = 789A */
2710 mov r1, r1, lsr #8 /* r1 = .9AB */
2711 #else
2712 strh r3, [r0, #0x01]
2713 mov r3, r3, lsr #16 /* r3 = ..43 */
2714 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2715 mov ip, ip, lsr #16 /* ip = ..87 */
2716 orr ip, ip, r1, lsl #16 /* ip = A987 */
2717 mov r1, r1, lsr #16 /* r1 = ..xB */
2718 #endif
2719 str r3, [r0, #0x03]
2720 str ip, [r0, #0x07]
2721 strb r1, [r0, #0x0b]
2722 RET
2723 LMEMCPY_C_PAD
2724
2725 /*
2726 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2727 */
2728 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2729 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2730 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2731 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2732 #ifdef __ARMEB__
2733 strh r1, [r0]
2734 mov r1, ip, lsl #16 /* r1 = 23.. */
2735 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2736 mov r3, r3, lsl #16 /* r3 = 67.. */
2737 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2738 #else
2739 strh ip, [r0]
2740 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2741 mov r3, r3, lsr #16 /* r3 = ..76 */
2742 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2743 mov r2, r2, lsr #16 /* r2 = ..BA */
2744 #endif
2745 str r1, [r0, #0x02]
2746 str r3, [r0, #0x06]
2747 strh r2, [r0, #0x0a]
2748 RET
2749 LMEMCPY_C_PAD
2750
2751 /*
2752 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2753 */
2754 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2755 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2756 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2757 strh ip, [r0]
2758 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2759 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2760 #ifdef __ARMEB__
2761 mov r2, r2, lsl #24 /* r2 = 2... */
2762 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2763 mov r3, r3, lsl #24 /* r3 = 6... */
2764 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2765 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2766 #else
2767 mov r2, r2, lsr #24 /* r2 = ...2 */
2768 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2769 mov r3, r3, lsr #24 /* r3 = ...6 */
2770 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2771 mov r1, r1, lsl #8 /* r1 = ..B. */
2772 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2773 #endif
2774 str r2, [r0, #0x02]
2775 str r3, [r0, #0x06]
2776 strh r1, [r0, #0x0a]
2777 RET
2778 LMEMCPY_C_PAD
2779
2780 /*
2781 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2782 */
2783 ldrh r2, [r1]
2784 ldr r3, [r1, #0x02]
2785 ldr ip, [r1, #0x06]
2786 ldrh r1, [r1, #0x0a]
2787 strh r2, [r0]
2788 str r3, [r0, #0x02]
2789 str ip, [r0, #0x06]
2790 strh r1, [r0, #0x0a]
2791 RET
2792 LMEMCPY_C_PAD
2793
2794 /*
2795 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2796 */
2797 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2798 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2799 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2800 strh ip, [r0, #0x0a]
2801 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2802 ldrb r1, [r1] /* r1 = ...0 */
2803 #ifdef __ARMEB__
2804 mov r2, r2, lsr #24 /* r2 = ...9 */
2805 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2806 mov r3, r3, lsr #24 /* r3 = ...5 */
2807 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2808 mov r1, r1, lsl #8 /* r1 = ..0. */
2809 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2810 #else
2811 mov r2, r2, lsl #24 /* r2 = 9... */
2812 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2813 mov r3, r3, lsl #24 /* r3 = 5... */
2814 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2815 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2816 #endif
2817 str r2, [r0, #0x06]
2818 str r3, [r0, #0x02]
2819 strh r1, [r0]
2820 RET
2821 LMEMCPY_C_PAD
2822
2823 /*
2824 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2825 */
2826 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2827 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2828 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2829 #ifdef __ARMEB__
2830 mov r3, r2, lsr #24 /* r3 = ...0 */
2831 strb r3, [r0]
2832 mov r2, r2, lsl #8 /* r2 = 123. */
2833 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2834 str r2, [r0, #0x01]
2835 mov r2, ip, lsl #8 /* r2 = 567. */
2836 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2837 str r2, [r0, #0x05]
2838 mov r2, r1, lsr #8 /* r2 = ..9A */
2839 strh r2, [r0, #0x09]
2840 strb r1, [r0, #0x0b]
2841 #else
2842 strb r2, [r0]
2843 mov r3, r2, lsr #8 /* r3 = .321 */
2844 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2845 str r3, [r0, #0x01]
2846 mov r3, ip, lsr #8 /* r3 = .765 */
2847 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2848 str r3, [r0, #0x05]
2849 mov r1, r1, lsr #8 /* r1 = .BA9 */
2850 strh r1, [r0, #0x09]
2851 mov r1, r1, lsr #16 /* r1 = ...B */
2852 strb r1, [r0, #0x0b]
2853 #endif
2854 RET
2855 LMEMCPY_C_PAD
2856
2857 /*
2858 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2859 */
2860 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2861 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2862 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2863 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2864 strb r2, [r0, #0x0b]
2865 #ifdef __ARMEB__
2866 strh r3, [r0, #0x09]
2867 mov r3, r3, lsr #16 /* r3 = ..78 */
2868 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2869 mov ip, ip, lsr #16 /* ip = ..34 */
2870 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2871 mov r1, r1, lsr #16 /* r1 = ..x0 */
2872 #else
2873 mov r2, r3, lsr #16 /* r2 = ..A9 */
2874 strh r2, [r0, #0x09]
2875 mov r3, r3, lsl #16 /* r3 = 87.. */
2876 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2877 mov ip, ip, lsl #16 /* ip = 43.. */
2878 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2879 mov r1, r1, lsr #8 /* r1 = .210 */
2880 #endif
2881 str r3, [r0, #0x05]
2882 str ip, [r0, #0x01]
2883 strb r1, [r0]
2884 RET
2885 LMEMCPY_C_PAD
2886
2887 /*
2888 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2889 */
2890 #ifdef __ARMEB__
2891 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2892 ldr ip, [r1, #0x06] /* ip = 6789 */
2893 ldr r3, [r1, #0x02] /* r3 = 2345 */
2894 ldrh r1, [r1] /* r1 = ..01 */
2895 strb r2, [r0, #0x0b]
2896 mov r2, r2, lsr #8 /* r2 = ...A */
2897 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2898 mov ip, ip, lsr #8 /* ip = .678 */
2899 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2900 mov r3, r3, lsr #8 /* r3 = .234 */
2901 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2902 mov r1, r1, lsr #8 /* r1 = ...0 */
2903 strb r1, [r0]
2904 str r3, [r0, #0x01]
2905 str ip, [r0, #0x05]
2906 strh r2, [r0, #0x09]
2907 #else
2908 ldrh r2, [r1] /* r2 = ..10 */
2909 ldr r3, [r1, #0x02] /* r3 = 5432 */
2910 ldr ip, [r1, #0x06] /* ip = 9876 */
2911 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2912 strb r2, [r0]
2913 mov r2, r2, lsr #8 /* r2 = ...1 */
2914 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2915 mov r3, r3, lsr #24 /* r3 = ...5 */
2916 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2917 mov ip, ip, lsr #24 /* ip = ...9 */
2918 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2919 mov r1, r1, lsr #8 /* r1 = ...B */
2920 str r2, [r0, #0x01]
2921 str r3, [r0, #0x05]
2922 strh ip, [r0, #0x09]
2923 strb r1, [r0, #0x0b]
2924 #endif
2925 RET
2926 LMEMCPY_C_PAD
2927
2928 /*
2929 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2930 */
2931 ldrb r2, [r1]
2932 ldr r3, [r1, #0x01]
2933 ldr ip, [r1, #0x05]
2934 strb r2, [r0]
2935 ldrh r2, [r1, #0x09]
2936 ldrb r1, [r1, #0x0b]
2937 str r3, [r0, #0x01]
2938 str ip, [r0, #0x05]
2939 strh r2, [r0, #0x09]
2940 strb r1, [r0, #0x0b]
2941 RET
2942 END(memcpy)
2943 #endif /* _ARM_ARCH_5E */
2944
2945 #ifdef GPROF
2946
2947 ENTRY(user)
2948 nop
2949 ENTRY(btrap)
2950 nop
2951 ENTRY(etrap)
2952 nop
2953 ENTRY(bintr)
2954 nop
2955 ENTRY(eintr)
2956 nop
2957
2958 #endif
Cache object: 7234edb2fa618340722f553ad481a98b
|