FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
29 *
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
46 * written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 */
60 /*
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
63 *
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 *
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
87 */
88
89 #include <machine/asm.h>
90 #include <machine/asmacros.h>
91 __FBSDID("$FreeBSD: releng/9.0/sys/arm/arm/support.S 203974 2010-02-16 21:59:17Z imp $");
92
93 #include "assym.s"
94
95 .L_arm_memcpy:
96 .word _C_LABEL(_arm_memcpy)
97 .L_arm_bzero:
98 .word _C_LABEL(_arm_bzero)
99 .L_min_memcpy_size:
100 .word _C_LABEL(_min_memcpy_size)
101 .L_min_bzero_size:
102 .word _C_LABEL(_min_bzero_size)
103 /*
104 * memset: Sets a block of memory to the specified value
105 *
106 * On entry:
107 * r0 - dest address
108 * r1 - byte to write
109 * r2 - number of bytes to write
110 *
111 * On exit:
112 * r0 - dest address
113 */
114 /* LINTSTUB: Func: void bzero(void *, size_t) */
115 ENTRY(bzero)
116 ldr r3, .L_arm_bzero
117 ldr r3, [r3]
118 cmp r3, #0
119 beq .Lnormal0
120 ldr r2, .L_min_bzero_size
121 ldr r2, [r2]
122 cmp r1, r2
123 blt .Lnormal0
124 stmfd sp!, {r0, r1, lr}
125 mov r2, #0
126 mov lr, pc
127 mov pc, r3
128 cmp r0, #0
129 ldmfd sp!, {r0, r1, lr}
130 RETeq
131 .Lnormal0:
132 mov r3, #0x00
133 b do_memset
134
135 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
136 ENTRY(memset)
137 and r3, r1, #0xff /* We deal with bytes */
138 mov r1, r2
139 do_memset:
140 cmp r1, #0x04 /* Do we have less than 4 bytes */
141 mov ip, r0
142 blt .Lmemset_lessthanfour
143
144 /* Ok first we will word align the address */
145 ands r2, ip, #0x03 /* Get the bottom two bits */
146 bne .Lmemset_wordunaligned /* The address is not word aligned */
147
148 /* We are now word aligned */
149 .Lmemset_wordaligned:
150 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
151 #ifdef _ARM_ARCH_5E
152 tst ip, #0x04 /* Quad-align for armv5e */
153 #else
154 cmp r1, #0x10
155 #endif
156 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
157 #ifdef _ARM_ARCH_5E
158 subne r1, r1, #0x04 /* Quad-align if necessary */
159 strne r3, [ip], #0x04
160 cmp r1, #0x10
161 #endif
162 blt .Lmemset_loop4 /* If less than 16 then use words */
163 mov r2, r3 /* Duplicate data */
164 cmp r1, #0x80 /* If < 128 then skip the big loop */
165 blt .Lmemset_loop32
166
167 /* Do 128 bytes at a time */
168 .Lmemset_loop128:
169 subs r1, r1, #0x80
170 #ifdef _ARM_ARCH_5E
171 strged r2, [ip], #0x08
172 strged r2, [ip], #0x08
173 strged r2, [ip], #0x08
174 strged r2, [ip], #0x08
175 strged r2, [ip], #0x08
176 strged r2, [ip], #0x08
177 strged r2, [ip], #0x08
178 strged r2, [ip], #0x08
179 strged r2, [ip], #0x08
180 strged r2, [ip], #0x08
181 strged r2, [ip], #0x08
182 strged r2, [ip], #0x08
183 strged r2, [ip], #0x08
184 strged r2, [ip], #0x08
185 strged r2, [ip], #0x08
186 strged r2, [ip], #0x08
187 #else
188 stmgeia ip!, {r2-r3}
189 stmgeia ip!, {r2-r3}
190 stmgeia ip!, {r2-r3}
191 stmgeia ip!, {r2-r3}
192 stmgeia ip!, {r2-r3}
193 stmgeia ip!, {r2-r3}
194 stmgeia ip!, {r2-r3}
195 stmgeia ip!, {r2-r3}
196 stmgeia ip!, {r2-r3}
197 stmgeia ip!, {r2-r3}
198 stmgeia ip!, {r2-r3}
199 stmgeia ip!, {r2-r3}
200 stmgeia ip!, {r2-r3}
201 stmgeia ip!, {r2-r3}
202 stmgeia ip!, {r2-r3}
203 stmgeia ip!, {r2-r3}
204 #endif
205 bgt .Lmemset_loop128
206 RETeq /* Zero length so just exit */
207
208 add r1, r1, #0x80 /* Adjust for extra sub */
209
210 /* Do 32 bytes at a time */
211 .Lmemset_loop32:
212 subs r1, r1, #0x20
213 #ifdef _ARM_ARCH_5E
214 strged r2, [ip], #0x08
215 strged r2, [ip], #0x08
216 strged r2, [ip], #0x08
217 strged r2, [ip], #0x08
218 #else
219 stmgeia ip!, {r2-r3}
220 stmgeia ip!, {r2-r3}
221 stmgeia ip!, {r2-r3}
222 stmgeia ip!, {r2-r3}
223 #endif
224 bgt .Lmemset_loop32
225 RETeq /* Zero length so just exit */
226
227 adds r1, r1, #0x10 /* Partially adjust for extra sub */
228
229 /* Deal with 16 bytes or more */
230 #ifdef _ARM_ARCH_5E
231 strged r2, [ip], #0x08
232 strged r2, [ip], #0x08
233 #else
234 stmgeia ip!, {r2-r3}
235 stmgeia ip!, {r2-r3}
236 #endif
237 RETeq /* Zero length so just exit */
238
239 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
240
241 /* We have at least 4 bytes so copy as words */
242 .Lmemset_loop4:
243 subs r1, r1, #0x04
244 strge r3, [ip], #0x04
245 bgt .Lmemset_loop4
246 RETeq /* Zero length so just exit */
247
248 #ifdef _ARM_ARCH_5E
249 /* Compensate for 64-bit alignment check */
250 adds r1, r1, #0x04
251 RETeq
252 cmp r1, #2
253 #else
254 cmp r1, #-2
255 #endif
256
257 strb r3, [ip], #0x01 /* Set 1 byte */
258 strgeb r3, [ip], #0x01 /* Set another byte */
259 strgtb r3, [ip] /* and a third */
260 RET /* Exit */
261
262 .Lmemset_wordunaligned:
263 rsb r2, r2, #0x004
264 strb r3, [ip], #0x01 /* Set 1 byte */
265 cmp r2, #0x02
266 strgeb r3, [ip], #0x01 /* Set another byte */
267 sub r1, r1, r2
268 strgtb r3, [ip], #0x01 /* and a third */
269 cmp r1, #0x04 /* More than 4 bytes left? */
270 bge .Lmemset_wordaligned /* Yup */
271
272 .Lmemset_lessthanfour:
273 cmp r1, #0x00
274 RETeq /* Zero length so exit */
275 strb r3, [ip], #0x01 /* Set 1 byte */
276 cmp r1, #0x02
277 strgeb r3, [ip], #0x01 /* Set another byte */
278 strgtb r3, [ip] /* and a third */
279 RET /* Exit */
280
281 ENTRY(bcmp)
282 mov ip, r0
283 cmp r2, #0x06
284 beq .Lmemcmp_6bytes
285 mov r0, #0x00
286
287 /* Are both addresses aligned the same way? */
288 cmp r2, #0x00
289 eornes r3, ip, r1
290 RETeq /* len == 0, or same addresses! */
291 tst r3, #0x03
292 subne r2, r2, #0x01
293 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
294
295 /* Word-align the addresses, if necessary */
296 sub r3, r1, #0x05
297 ands r3, r3, #0x03
298 add r3, r3, r3, lsl #1
299 addne pc, pc, r3, lsl #3
300 nop
301
302 /* Compare up to 3 bytes */
303 ldrb r0, [ip], #0x01
304 ldrb r3, [r1], #0x01
305 subs r0, r0, r3
306 RETne
307 subs r2, r2, #0x01
308 RETeq
309
310 /* Compare up to 2 bytes */
311 ldrb r0, [ip], #0x01
312 ldrb r3, [r1], #0x01
313 subs r0, r0, r3
314 RETne
315 subs r2, r2, #0x01
316 RETeq
317
318 /* Compare 1 byte */
319 ldrb r0, [ip], #0x01
320 ldrb r3, [r1], #0x01
321 subs r0, r0, r3
322 RETne
323 subs r2, r2, #0x01
324 RETeq
325
326 /* Compare 4 bytes at a time, if possible */
327 subs r2, r2, #0x04
328 bcc .Lmemcmp_bytewise
329 .Lmemcmp_word_aligned:
330 ldr r0, [ip], #0x04
331 ldr r3, [r1], #0x04
332 subs r2, r2, #0x04
333 cmpcs r0, r3
334 beq .Lmemcmp_word_aligned
335 sub r0, r0, r3
336
337 /* Correct for extra subtraction, and check if done */
338 adds r2, r2, #0x04
339 cmpeq r0, #0x00 /* If done, did all bytes match? */
340 RETeq /* Yup. Just return */
341
342 /* Re-do the final word byte-wise */
343 sub ip, ip, #0x04
344 sub r1, r1, #0x04
345
346 .Lmemcmp_bytewise:
347 add r2, r2, #0x03
348 .Lmemcmp_bytewise2:
349 ldrb r0, [ip], #0x01
350 ldrb r3, [r1], #0x01
351 subs r2, r2, #0x01
352 cmpcs r0, r3
353 beq .Lmemcmp_bytewise2
354 sub r0, r0, r3
355 RET
356
357 /*
358 * 6 byte compares are very common, thanks to the network stack.
359 * This code is hand-scheduled to reduce the number of stalls for
360 * load results. Everything else being equal, this will be ~32%
361 * faster than a byte-wise memcmp.
362 */
363 .align 5
364 .Lmemcmp_6bytes:
365 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
366 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
367 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
368 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
369 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */
370 RETne /* Return if mismatch on #0 */
371 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
372 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */
373 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */
374 RETne /* Return if mismatch on #1 */
375 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
376 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
377 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */
378 RETne /* Return if mismatch on #2 */
379 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
380 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */
381 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */
382 RETne /* Return if mismatch on #3 */
383 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
384 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
385 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */
386 RETne /* Return if mismatch on #4 */
387 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
388 RET
389
390 ENTRY(bcopy)
391 /* switch the source and destination registers */
392 eor r0, r1, r0
393 eor r1, r0, r1
394 eor r0, r1, r0
395 ENTRY(memmove)
396 /* Do the buffers overlap? */
397 cmp r0, r1
398 RETeq /* Bail now if src/dst are the same */
399 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
400 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
401 cmp r3, r2 /* if (r3 < len) we have an overlap */
402 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
403
404 /* Determine copy direction */
405 cmp r1, r0
406 bcc .Lmemmove_backwards
407
408 moveq r0, #0 /* Quick abort for len=0 */
409 RETeq
410
411 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
412 subs r2, r2, #4
413 blt .Lmemmove_fl4 /* less than 4 bytes */
414 ands r12, r0, #3
415 bne .Lmemmove_fdestul /* oh unaligned destination addr */
416 ands r12, r1, #3
417 bne .Lmemmove_fsrcul /* oh unaligned source addr */
418
419 .Lmemmove_ft8:
420 /* We have aligned source and destination */
421 subs r2, r2, #8
422 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
423 subs r2, r2, #0x14
424 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
425 stmdb sp!, {r4} /* borrow r4 */
426
427 /* blat 32 bytes at a time */
428 /* XXX for really big copies perhaps we should use more registers */
429 .Lmemmove_floop32:
430 ldmia r1!, {r3, r4, r12, lr}
431 stmia r0!, {r3, r4, r12, lr}
432 ldmia r1!, {r3, r4, r12, lr}
433 stmia r0!, {r3, r4, r12, lr}
434 subs r2, r2, #0x20
435 bge .Lmemmove_floop32
436
437 cmn r2, #0x10
438 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
439 stmgeia r0!, {r3, r4, r12, lr}
440 subge r2, r2, #0x10
441 ldmia sp!, {r4} /* return r4 */
442
443 .Lmemmove_fl32:
444 adds r2, r2, #0x14
445
446 /* blat 12 bytes at a time */
447 .Lmemmove_floop12:
448 ldmgeia r1!, {r3, r12, lr}
449 stmgeia r0!, {r3, r12, lr}
450 subges r2, r2, #0x0c
451 bge .Lmemmove_floop12
452
453 .Lmemmove_fl12:
454 adds r2, r2, #8
455 blt .Lmemmove_fl4
456
457 subs r2, r2, #4
458 ldrlt r3, [r1], #4
459 strlt r3, [r0], #4
460 ldmgeia r1!, {r3, r12}
461 stmgeia r0!, {r3, r12}
462 subge r2, r2, #4
463
464 .Lmemmove_fl4:
465 /* less than 4 bytes to go */
466 adds r2, r2, #4
467 ldmeqia sp!, {r0, pc} /* done */
468
469 /* copy the crud byte at a time */
470 cmp r2, #2
471 ldrb r3, [r1], #1
472 strb r3, [r0], #1
473 ldrgeb r3, [r1], #1
474 strgeb r3, [r0], #1
475 ldrgtb r3, [r1], #1
476 strgtb r3, [r0], #1
477 ldmia sp!, {r0, pc}
478
479 /* erg - unaligned destination */
480 .Lmemmove_fdestul:
481 rsb r12, r12, #4
482 cmp r12, #2
483
484 /* align destination with byte copies */
485 ldrb r3, [r1], #1
486 strb r3, [r0], #1
487 ldrgeb r3, [r1], #1
488 strgeb r3, [r0], #1
489 ldrgtb r3, [r1], #1
490 strgtb r3, [r0], #1
491 subs r2, r2, r12
492 blt .Lmemmove_fl4 /* less the 4 bytes */
493
494 ands r12, r1, #3
495 beq .Lmemmove_ft8 /* we have an aligned source */
496
497 /* erg - unaligned source */
498 /* This is where it gets nasty ... */
499 .Lmemmove_fsrcul:
500 bic r1, r1, #3
501 ldr lr, [r1], #4
502 cmp r12, #2
503 bgt .Lmemmove_fsrcul3
504 beq .Lmemmove_fsrcul2
505 cmp r2, #0x0c
506 blt .Lmemmove_fsrcul1loop4
507 sub r2, r2, #0x0c
508 stmdb sp!, {r4, r5}
509
510 .Lmemmove_fsrcul1loop16:
511 #ifdef __ARMEB__
512 mov r3, lr, lsl #8
513 #else
514 mov r3, lr, lsr #8
515 #endif
516 ldmia r1!, {r4, r5, r12, lr}
517 #ifdef __ARMEB__
518 orr r3, r3, r4, lsr #24
519 mov r4, r4, lsl #8
520 orr r4, r4, r5, lsr #24
521 mov r5, r5, lsl #8
522 orr r5, r5, r12, lsr #24
523 mov r12, r12, lsl #8
524 orr r12, r12, lr, lsr #24
525 #else
526 orr r3, r3, r4, lsl #24
527 mov r4, r4, lsr #8
528 orr r4, r4, r5, lsl #24
529 mov r5, r5, lsr #8
530 orr r5, r5, r12, lsl #24
531 mov r12, r12, lsr #8
532 orr r12, r12, lr, lsl #24
533 #endif
534 stmia r0!, {r3-r5, r12}
535 subs r2, r2, #0x10
536 bge .Lmemmove_fsrcul1loop16
537 ldmia sp!, {r4, r5}
538 adds r2, r2, #0x0c
539 blt .Lmemmove_fsrcul1l4
540
541 .Lmemmove_fsrcul1loop4:
542 #ifdef __ARMEB__
543 mov r12, lr, lsl #8
544 #else
545 mov r12, lr, lsr #8
546 #endif
547 ldr lr, [r1], #4
548 #ifdef __ARMEB__
549 orr r12, r12, lr, lsr #24
550 #else
551 orr r12, r12, lr, lsl #24
552 #endif
553 str r12, [r0], #4
554 subs r2, r2, #4
555 bge .Lmemmove_fsrcul1loop4
556
557 .Lmemmove_fsrcul1l4:
558 sub r1, r1, #3
559 b .Lmemmove_fl4
560
561 .Lmemmove_fsrcul2:
562 cmp r2, #0x0c
563 blt .Lmemmove_fsrcul2loop4
564 sub r2, r2, #0x0c
565 stmdb sp!, {r4, r5}
566
567 .Lmemmove_fsrcul2loop16:
568 #ifdef __ARMEB__
569 mov r3, lr, lsl #16
570 #else
571 mov r3, lr, lsr #16
572 #endif
573 ldmia r1!, {r4, r5, r12, lr}
574 #ifdef __ARMEB__
575 orr r3, r3, r4, lsr #16
576 mov r4, r4, lsl #16
577 orr r4, r4, r5, lsr #16
578 mov r5, r5, lsl #16
579 orr r5, r5, r12, lsr #16
580 mov r12, r12, lsl #16
581 orr r12, r12, lr, lsr #16
582 #else
583 orr r3, r3, r4, lsl #16
584 mov r4, r4, lsr #16
585 orr r4, r4, r5, lsl #16
586 mov r5, r5, lsr #16
587 orr r5, r5, r12, lsl #16
588 mov r12, r12, lsr #16
589 orr r12, r12, lr, lsl #16
590 #endif
591 stmia r0!, {r3-r5, r12}
592 subs r2, r2, #0x10
593 bge .Lmemmove_fsrcul2loop16
594 ldmia sp!, {r4, r5}
595 adds r2, r2, #0x0c
596 blt .Lmemmove_fsrcul2l4
597
598 .Lmemmove_fsrcul2loop4:
599 #ifdef __ARMEB__
600 mov r12, lr, lsl #16
601 #else
602 mov r12, lr, lsr #16
603 #endif
604 ldr lr, [r1], #4
605 #ifdef __ARMEB__
606 orr r12, r12, lr, lsr #16
607 #else
608 orr r12, r12, lr, lsl #16
609 #endif
610 str r12, [r0], #4
611 subs r2, r2, #4
612 bge .Lmemmove_fsrcul2loop4
613
614 .Lmemmove_fsrcul2l4:
615 sub r1, r1, #2
616 b .Lmemmove_fl4
617
618 .Lmemmove_fsrcul3:
619 cmp r2, #0x0c
620 blt .Lmemmove_fsrcul3loop4
621 sub r2, r2, #0x0c
622 stmdb sp!, {r4, r5}
623
624 .Lmemmove_fsrcul3loop16:
625 #ifdef __ARMEB__
626 mov r3, lr, lsl #24
627 #else
628 mov r3, lr, lsr #24
629 #endif
630 ldmia r1!, {r4, r5, r12, lr}
631 #ifdef __ARMEB__
632 orr r3, r3, r4, lsr #8
633 mov r4, r4, lsl #24
634 orr r4, r4, r5, lsr #8
635 mov r5, r5, lsl #24
636 orr r5, r5, r12, lsr #8
637 mov r12, r12, lsl #24
638 orr r12, r12, lr, lsr #8
639 #else
640 orr r3, r3, r4, lsl #8
641 mov r4, r4, lsr #24
642 orr r4, r4, r5, lsl #8
643 mov r5, r5, lsr #24
644 orr r5, r5, r12, lsl #8
645 mov r12, r12, lsr #24
646 orr r12, r12, lr, lsl #8
647 #endif
648 stmia r0!, {r3-r5, r12}
649 subs r2, r2, #0x10
650 bge .Lmemmove_fsrcul3loop16
651 ldmia sp!, {r4, r5}
652 adds r2, r2, #0x0c
653 blt .Lmemmove_fsrcul3l4
654
655 .Lmemmove_fsrcul3loop4:
656 #ifdef __ARMEB__
657 mov r12, lr, lsl #24
658 #else
659 mov r12, lr, lsr #24
660 #endif
661 ldr lr, [r1], #4
662 #ifdef __ARMEB__
663 orr r12, r12, lr, lsr #8
664 #else
665 orr r12, r12, lr, lsl #8
666 #endif
667 str r12, [r0], #4
668 subs r2, r2, #4
669 bge .Lmemmove_fsrcul3loop4
670
671 .Lmemmove_fsrcul3l4:
672 sub r1, r1, #1
673 b .Lmemmove_fl4
674
675 .Lmemmove_backwards:
676 add r1, r1, r2
677 add r0, r0, r2
678 subs r2, r2, #4
679 blt .Lmemmove_bl4 /* less than 4 bytes */
680 ands r12, r0, #3
681 bne .Lmemmove_bdestul /* oh unaligned destination addr */
682 ands r12, r1, #3
683 bne .Lmemmove_bsrcul /* oh unaligned source addr */
684
685 .Lmemmove_bt8:
686 /* We have aligned source and destination */
687 subs r2, r2, #8
688 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
689 stmdb sp!, {r4, lr}
690 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
691 blt .Lmemmove_bl32
692
693 /* blat 32 bytes at a time */
694 /* XXX for really big copies perhaps we should use more registers */
695 .Lmemmove_bloop32:
696 ldmdb r1!, {r3, r4, r12, lr}
697 stmdb r0!, {r3, r4, r12, lr}
698 ldmdb r1!, {r3, r4, r12, lr}
699 stmdb r0!, {r3, r4, r12, lr}
700 subs r2, r2, #0x20
701 bge .Lmemmove_bloop32
702
703 .Lmemmove_bl32:
704 cmn r2, #0x10
705 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
706 stmgedb r0!, {r3, r4, r12, lr}
707 subge r2, r2, #0x10
708 adds r2, r2, #0x14
709 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
710 stmgedb r0!, {r3, r12, lr}
711 subge r2, r2, #0x0c
712 ldmia sp!, {r4, lr}
713
714 .Lmemmove_bl12:
715 adds r2, r2, #8
716 blt .Lmemmove_bl4
717 subs r2, r2, #4
718 ldrlt r3, [r1, #-4]!
719 strlt r3, [r0, #-4]!
720 ldmgedb r1!, {r3, r12}
721 stmgedb r0!, {r3, r12}
722 subge r2, r2, #4
723
724 .Lmemmove_bl4:
725 /* less than 4 bytes to go */
726 adds r2, r2, #4
727 RETeq /* done */
728
729 /* copy the crud byte at a time */
730 cmp r2, #2
731 ldrb r3, [r1, #-1]!
732 strb r3, [r0, #-1]!
733 ldrgeb r3, [r1, #-1]!
734 strgeb r3, [r0, #-1]!
735 ldrgtb r3, [r1, #-1]!
736 strgtb r3, [r0, #-1]!
737 RET
738
739 /* erg - unaligned destination */
740 .Lmemmove_bdestul:
741 cmp r12, #2
742
743 /* align destination with byte copies */
744 ldrb r3, [r1, #-1]!
745 strb r3, [r0, #-1]!
746 ldrgeb r3, [r1, #-1]!
747 strgeb r3, [r0, #-1]!
748 ldrgtb r3, [r1, #-1]!
749 strgtb r3, [r0, #-1]!
750 subs r2, r2, r12
751 blt .Lmemmove_bl4 /* less than 4 bytes to go */
752 ands r12, r1, #3
753 beq .Lmemmove_bt8 /* we have an aligned source */
754
755 /* erg - unaligned source */
756 /* This is where it gets nasty ... */
757 .Lmemmove_bsrcul:
758 bic r1, r1, #3
759 ldr r3, [r1, #0]
760 cmp r12, #2
761 blt .Lmemmove_bsrcul1
762 beq .Lmemmove_bsrcul2
763 cmp r2, #0x0c
764 blt .Lmemmove_bsrcul3loop4
765 sub r2, r2, #0x0c
766 stmdb sp!, {r4, r5, lr}
767
768 .Lmemmove_bsrcul3loop16:
769 #ifdef __ARMEB__
770 mov lr, r3, lsr #8
771 #else
772 mov lr, r3, lsl #8
773 #endif
774 ldmdb r1!, {r3-r5, r12}
775 #ifdef __ARMEB__
776 orr lr, lr, r12, lsl #24
777 mov r12, r12, lsr #8
778 orr r12, r12, r5, lsl #24
779 mov r5, r5, lsr #8
780 orr r5, r5, r4, lsl #24
781 mov r4, r4, lsr #8
782 orr r4, r4, r3, lsl #24
783 #else
784 orr lr, lr, r12, lsr #24
785 mov r12, r12, lsl #8
786 orr r12, r12, r5, lsr #24
787 mov r5, r5, lsl #8
788 orr r5, r5, r4, lsr #24
789 mov r4, r4, lsl #8
790 orr r4, r4, r3, lsr #24
791 #endif
792 stmdb r0!, {r4, r5, r12, lr}
793 subs r2, r2, #0x10
794 bge .Lmemmove_bsrcul3loop16
795 ldmia sp!, {r4, r5, lr}
796 adds r2, r2, #0x0c
797 blt .Lmemmove_bsrcul3l4
798
799 .Lmemmove_bsrcul3loop4:
800 #ifdef __ARMEB__
801 mov r12, r3, lsr #8
802 #else
803 mov r12, r3, lsl #8
804 #endif
805 ldr r3, [r1, #-4]!
806 #ifdef __ARMEB__
807 orr r12, r12, r3, lsl #24
808 #else
809 orr r12, r12, r3, lsr #24
810 #endif
811 str r12, [r0, #-4]!
812 subs r2, r2, #4
813 bge .Lmemmove_bsrcul3loop4
814
815 .Lmemmove_bsrcul3l4:
816 add r1, r1, #3
817 b .Lmemmove_bl4
818
819 .Lmemmove_bsrcul2:
820 cmp r2, #0x0c
821 blt .Lmemmove_bsrcul2loop4
822 sub r2, r2, #0x0c
823 stmdb sp!, {r4, r5, lr}
824
825 .Lmemmove_bsrcul2loop16:
826 #ifdef __ARMEB__
827 mov lr, r3, lsr #16
828 #else
829 mov lr, r3, lsl #16
830 #endif
831 ldmdb r1!, {r3-r5, r12}
832 #ifdef __ARMEB__
833 orr lr, lr, r12, lsl #16
834 mov r12, r12, lsr #16
835 orr r12, r12, r5, lsl #16
836 mov r5, r5, lsr #16
837 orr r5, r5, r4, lsl #16
838 mov r4, r4, lsr #16
839 orr r4, r4, r3, lsl #16
840 #else
841 orr lr, lr, r12, lsr #16
842 mov r12, r12, lsl #16
843 orr r12, r12, r5, lsr #16
844 mov r5, r5, lsl #16
845 orr r5, r5, r4, lsr #16
846 mov r4, r4, lsl #16
847 orr r4, r4, r3, lsr #16
848 #endif
849 stmdb r0!, {r4, r5, r12, lr}
850 subs r2, r2, #0x10
851 bge .Lmemmove_bsrcul2loop16
852 ldmia sp!, {r4, r5, lr}
853 adds r2, r2, #0x0c
854 blt .Lmemmove_bsrcul2l4
855
856 .Lmemmove_bsrcul2loop4:
857 #ifdef __ARMEB__
858 mov r12, r3, lsr #16
859 #else
860 mov r12, r3, lsl #16
861 #endif
862 ldr r3, [r1, #-4]!
863 #ifdef __ARMEB__
864 orr r12, r12, r3, lsl #16
865 #else
866 orr r12, r12, r3, lsr #16
867 #endif
868 str r12, [r0, #-4]!
869 subs r2, r2, #4
870 bge .Lmemmove_bsrcul2loop4
871
872 .Lmemmove_bsrcul2l4:
873 add r1, r1, #2
874 b .Lmemmove_bl4
875
876 .Lmemmove_bsrcul1:
877 cmp r2, #0x0c
878 blt .Lmemmove_bsrcul1loop4
879 sub r2, r2, #0x0c
880 stmdb sp!, {r4, r5, lr}
881
882 .Lmemmove_bsrcul1loop32:
883 #ifdef __ARMEB__
884 mov lr, r3, lsr #24
885 #else
886 mov lr, r3, lsl #24
887 #endif
888 ldmdb r1!, {r3-r5, r12}
889 #ifdef __ARMEB__
890 orr lr, lr, r12, lsl #8
891 mov r12, r12, lsr #24
892 orr r12, r12, r5, lsl #8
893 mov r5, r5, lsr #24
894 orr r5, r5, r4, lsl #8
895 mov r4, r4, lsr #24
896 orr r4, r4, r3, lsl #8
897 #else
898 orr lr, lr, r12, lsr #8
899 mov r12, r12, lsl #24
900 orr r12, r12, r5, lsr #8
901 mov r5, r5, lsl #24
902 orr r5, r5, r4, lsr #8
903 mov r4, r4, lsl #24
904 orr r4, r4, r3, lsr #8
905 #endif
906 stmdb r0!, {r4, r5, r12, lr}
907 subs r2, r2, #0x10
908 bge .Lmemmove_bsrcul1loop32
909 ldmia sp!, {r4, r5, lr}
910 adds r2, r2, #0x0c
911 blt .Lmemmove_bsrcul1l4
912
913 .Lmemmove_bsrcul1loop4:
914 #ifdef __ARMEB__
915 mov r12, r3, lsr #24
916 #else
917 mov r12, r3, lsl #24
918 #endif
919 ldr r3, [r1, #-4]!
920 #ifdef __ARMEB__
921 orr r12, r12, r3, lsl #8
922 #else
923 orr r12, r12, r3, lsr #8
924 #endif
925 str r12, [r0, #-4]!
926 subs r2, r2, #4
927 bge .Lmemmove_bsrcul1loop4
928
929 .Lmemmove_bsrcul1l4:
930 add r1, r1, #1
931 b .Lmemmove_bl4
932
933 #if !defined(_ARM_ARCH_5E)
934 ENTRY(memcpy)
935 /* save leaf functions having to store this away */
936 /* Do not check arm_memcpy if we're running from flash */
937 #ifdef FLASHADDR
938 #if FLASHADDR > PHYSADDR
939 ldr r3, =FLASHADDR
940 cmp r3, pc
941 bls .Lnormal
942 #else
943 ldr r3, =FLASHADDR
944 cmp r3, pc
945 bhi .Lnormal
946 #endif
947 #endif
948 ldr r3, .L_arm_memcpy
949 ldr r3, [r3]
950 cmp r3, #0
951 beq .Lnormal
952 ldr r3, .L_min_memcpy_size
953 ldr r3, [r3]
954 cmp r2, r3
955 blt .Lnormal
956 stmfd sp!, {r0-r2, r4, lr}
957 mov r3, #0
958 ldr r4, .L_arm_memcpy
959 mov lr, pc
960 ldr pc, [r4]
961 cmp r0, #0
962 ldmfd sp!, {r0-r2, r4, lr}
963 RETeq
964
965 .Lnormal:
966 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
967
968 subs r2, r2, #4
969 blt .Lmemcpy_l4 /* less than 4 bytes */
970 ands r12, r0, #3
971 bne .Lmemcpy_destul /* oh unaligned destination addr */
972 ands r12, r1, #3
973 bne .Lmemcpy_srcul /* oh unaligned source addr */
974
975 .Lmemcpy_t8:
976 /* We have aligned source and destination */
977 subs r2, r2, #8
978 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
979 subs r2, r2, #0x14
980 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
981 stmdb sp!, {r4} /* borrow r4 */
982
983 /* blat 32 bytes at a time */
984 /* XXX for really big copies perhaps we should use more registers */
985 .Lmemcpy_loop32:
986 ldmia r1!, {r3, r4, r12, lr}
987 stmia r0!, {r3, r4, r12, lr}
988 ldmia r1!, {r3, r4, r12, lr}
989 stmia r0!, {r3, r4, r12, lr}
990 subs r2, r2, #0x20
991 bge .Lmemcpy_loop32
992
993 cmn r2, #0x10
994 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
995 stmgeia r0!, {r3, r4, r12, lr}
996 subge r2, r2, #0x10
997 ldmia sp!, {r4} /* return r4 */
998
999 .Lmemcpy_l32:
1000 adds r2, r2, #0x14
1001
1002 /* blat 12 bytes at a time */
1003 .Lmemcpy_loop12:
1004 ldmgeia r1!, {r3, r12, lr}
1005 stmgeia r0!, {r3, r12, lr}
1006 subges r2, r2, #0x0c
1007 bge .Lmemcpy_loop12
1008
1009 .Lmemcpy_l12:
1010 adds r2, r2, #8
1011 blt .Lmemcpy_l4
1012
1013 subs r2, r2, #4
1014 ldrlt r3, [r1], #4
1015 strlt r3, [r0], #4
1016 ldmgeia r1!, {r3, r12}
1017 stmgeia r0!, {r3, r12}
1018 subge r2, r2, #4
1019
1020 .Lmemcpy_l4:
1021 /* less than 4 bytes to go */
1022 adds r2, r2, #4
1023 #ifdef __APCS_26_
1024 ldmeqia sp!, {r0, pc}^ /* done */
1025 #else
1026 ldmeqia sp!, {r0, pc} /* done */
1027 #endif
1028 /* copy the crud byte at a time */
1029 cmp r2, #2
1030 ldrb r3, [r1], #1
1031 strb r3, [r0], #1
1032 ldrgeb r3, [r1], #1
1033 strgeb r3, [r0], #1
1034 ldrgtb r3, [r1], #1
1035 strgtb r3, [r0], #1
1036 ldmia sp!, {r0, pc}
1037
1038 /* erg - unaligned destination */
1039 .Lmemcpy_destul:
1040 rsb r12, r12, #4
1041 cmp r12, #2
1042
1043 /* align destination with byte copies */
1044 ldrb r3, [r1], #1
1045 strb r3, [r0], #1
1046 ldrgeb r3, [r1], #1
1047 strgeb r3, [r0], #1
1048 ldrgtb r3, [r1], #1
1049 strgtb r3, [r0], #1
1050 subs r2, r2, r12
1051 blt .Lmemcpy_l4 /* less the 4 bytes */
1052
1053 ands r12, r1, #3
1054 beq .Lmemcpy_t8 /* we have an aligned source */
1055
1056 /* erg - unaligned source */
1057 /* This is where it gets nasty ... */
1058 .Lmemcpy_srcul:
1059 bic r1, r1, #3
1060 ldr lr, [r1], #4
1061 cmp r12, #2
1062 bgt .Lmemcpy_srcul3
1063 beq .Lmemcpy_srcul2
1064 cmp r2, #0x0c
1065 blt .Lmemcpy_srcul1loop4
1066 sub r2, r2, #0x0c
1067 stmdb sp!, {r4, r5}
1068
1069 .Lmemcpy_srcul1loop16:
1070 mov r3, lr, lsr #8
1071 ldmia r1!, {r4, r5, r12, lr}
1072 orr r3, r3, r4, lsl #24
1073 mov r4, r4, lsr #8
1074 orr r4, r4, r5, lsl #24
1075 mov r5, r5, lsr #8
1076 orr r5, r5, r12, lsl #24
1077 mov r12, r12, lsr #8
1078 orr r12, r12, lr, lsl #24
1079 stmia r0!, {r3-r5, r12}
1080 subs r2, r2, #0x10
1081 bge .Lmemcpy_srcul1loop16
1082 ldmia sp!, {r4, r5}
1083 adds r2, r2, #0x0c
1084 blt .Lmemcpy_srcul1l4
1085
1086 .Lmemcpy_srcul1loop4:
1087 mov r12, lr, lsr #8
1088 ldr lr, [r1], #4
1089 orr r12, r12, lr, lsl #24
1090 str r12, [r0], #4
1091 subs r2, r2, #4
1092 bge .Lmemcpy_srcul1loop4
1093
1094 .Lmemcpy_srcul1l4:
1095 sub r1, r1, #3
1096 b .Lmemcpy_l4
1097
1098 .Lmemcpy_srcul2:
1099 cmp r2, #0x0c
1100 blt .Lmemcpy_srcul2loop4
1101 sub r2, r2, #0x0c
1102 stmdb sp!, {r4, r5}
1103
1104 .Lmemcpy_srcul2loop16:
1105 mov r3, lr, lsr #16
1106 ldmia r1!, {r4, r5, r12, lr}
1107 orr r3, r3, r4, lsl #16
1108 mov r4, r4, lsr #16
1109 orr r4, r4, r5, lsl #16
1110 mov r5, r5, lsr #16
1111 orr r5, r5, r12, lsl #16
1112 mov r12, r12, lsr #16
1113 orr r12, r12, lr, lsl #16
1114 stmia r0!, {r3-r5, r12}
1115 subs r2, r2, #0x10
1116 bge .Lmemcpy_srcul2loop16
1117 ldmia sp!, {r4, r5}
1118 adds r2, r2, #0x0c
1119 blt .Lmemcpy_srcul2l4
1120
1121 .Lmemcpy_srcul2loop4:
1122 mov r12, lr, lsr #16
1123 ldr lr, [r1], #4
1124 orr r12, r12, lr, lsl #16
1125 str r12, [r0], #4
1126 subs r2, r2, #4
1127 bge .Lmemcpy_srcul2loop4
1128
1129 .Lmemcpy_srcul2l4:
1130 sub r1, r1, #2
1131 b .Lmemcpy_l4
1132
1133 .Lmemcpy_srcul3:
1134 cmp r2, #0x0c
1135 blt .Lmemcpy_srcul3loop4
1136 sub r2, r2, #0x0c
1137 stmdb sp!, {r4, r5}
1138
1139 .Lmemcpy_srcul3loop16:
1140 mov r3, lr, lsr #24
1141 ldmia r1!, {r4, r5, r12, lr}
1142 orr r3, r3, r4, lsl #8
1143 mov r4, r4, lsr #24
1144 orr r4, r4, r5, lsl #8
1145 mov r5, r5, lsr #24
1146 orr r5, r5, r12, lsl #8
1147 mov r12, r12, lsr #24
1148 orr r12, r12, lr, lsl #8
1149 stmia r0!, {r3-r5, r12}
1150 subs r2, r2, #0x10
1151 bge .Lmemcpy_srcul3loop16
1152 ldmia sp!, {r4, r5}
1153 adds r2, r2, #0x0c
1154 blt .Lmemcpy_srcul3l4
1155
1156 .Lmemcpy_srcul3loop4:
1157 mov r12, lr, lsr #24
1158 ldr lr, [r1], #4
1159 orr r12, r12, lr, lsl #8
1160 str r12, [r0], #4
1161 subs r2, r2, #4
1162 bge .Lmemcpy_srcul3loop4
1163
1164 .Lmemcpy_srcul3l4:
1165 sub r1, r1, #1
1166 b .Lmemcpy_l4
1167 #else
1168 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1169 ENTRY(memcpy)
1170 pld [r1]
1171 cmp r2, #0x0c
1172 ble .Lmemcpy_short /* <= 12 bytes */
1173 #ifdef FLASHADDR
1174 #if FLASHADDR > PHYSADDR
1175 ldr r3, =FLASHADDR
1176 cmp r3, pc
1177 bls .Lnormal
1178 #else
1179 ldr r3, =FLASHADDR
1180 cmp r3, pc
1181 bhi .Lnormal
1182 #endif
1183 #endif
1184 ldr r3, .L_arm_memcpy
1185 ldr r3, [r3]
1186 cmp r3, #0
1187 beq .Lnormal
1188 ldr r3, .L_min_memcpy_size
1189 ldr r3, [r3]
1190 cmp r2, r3
1191 blt .Lnormal
1192 stmfd sp!, {r0-r2, r4, lr}
1193 mov r3, #0
1194 ldr r4, .L_arm_memcpy
1195 mov lr, pc
1196 ldr pc, [r4]
1197 cmp r0, #0
1198 ldmfd sp!, {r0-r2, r4, lr}
1199 RETeq
1200 .Lnormal:
1201 mov r3, r0 /* We must not clobber r0 */
1202
1203 /* Word-align the destination buffer */
1204 ands ip, r3, #0x03 /* Already word aligned? */
1205 beq .Lmemcpy_wordaligned /* Yup */
1206 cmp ip, #0x02
1207 ldrb ip, [r1], #0x01
1208 sub r2, r2, #0x01
1209 strb ip, [r3], #0x01
1210 ldrleb ip, [r1], #0x01
1211 suble r2, r2, #0x01
1212 strleb ip, [r3], #0x01
1213 ldrltb ip, [r1], #0x01
1214 sublt r2, r2, #0x01
1215 strltb ip, [r3], #0x01
1216
1217 /* Destination buffer is now word aligned */
1218 .Lmemcpy_wordaligned:
1219 ands ip, r1, #0x03 /* Is src also word-aligned? */
1220 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1221
1222 /* Quad-align the destination buffer */
1223 tst r3, #0x07 /* Already quad aligned? */
1224 ldrne ip, [r1], #0x04
1225 stmfd sp!, {r4-r9} /* Free up some registers */
1226 subne r2, r2, #0x04
1227 strne ip, [r3], #0x04
1228
1229 /* Destination buffer quad aligned, source is at least word aligned */
1230 subs r2, r2, #0x80
1231 blt .Lmemcpy_w_lessthan128
1232
1233 /* Copy 128 bytes at a time */
1234 .Lmemcpy_w_loop128:
1235 ldr r4, [r1], #0x04 /* LD:00-03 */
1236 ldr r5, [r1], #0x04 /* LD:04-07 */
1237 pld [r1, #0x18] /* Prefetch 0x20 */
1238 ldr r6, [r1], #0x04 /* LD:08-0b */
1239 ldr r7, [r1], #0x04 /* LD:0c-0f */
1240 ldr r8, [r1], #0x04 /* LD:10-13 */
1241 ldr r9, [r1], #0x04 /* LD:14-17 */
1242 strd r4, [r3], #0x08 /* ST:00-07 */
1243 ldr r4, [r1], #0x04 /* LD:18-1b */
1244 ldr r5, [r1], #0x04 /* LD:1c-1f */
1245 strd r6, [r3], #0x08 /* ST:08-0f */
1246 ldr r6, [r1], #0x04 /* LD:20-23 */
1247 ldr r7, [r1], #0x04 /* LD:24-27 */
1248 pld [r1, #0x18] /* Prefetch 0x40 */
1249 strd r8, [r3], #0x08 /* ST:10-17 */
1250 ldr r8, [r1], #0x04 /* LD:28-2b */
1251 ldr r9, [r1], #0x04 /* LD:2c-2f */
1252 strd r4, [r3], #0x08 /* ST:18-1f */
1253 ldr r4, [r1], #0x04 /* LD:30-33 */
1254 ldr r5, [r1], #0x04 /* LD:34-37 */
1255 strd r6, [r3], #0x08 /* ST:20-27 */
1256 ldr r6, [r1], #0x04 /* LD:38-3b */
1257 ldr r7, [r1], #0x04 /* LD:3c-3f */
1258 strd r8, [r3], #0x08 /* ST:28-2f */
1259 ldr r8, [r1], #0x04 /* LD:40-43 */
1260 ldr r9, [r1], #0x04 /* LD:44-47 */
1261 pld [r1, #0x18] /* Prefetch 0x60 */
1262 strd r4, [r3], #0x08 /* ST:30-37 */
1263 ldr r4, [r1], #0x04 /* LD:48-4b */
1264 ldr r5, [r1], #0x04 /* LD:4c-4f */
1265 strd r6, [r3], #0x08 /* ST:38-3f */
1266 ldr r6, [r1], #0x04 /* LD:50-53 */
1267 ldr r7, [r1], #0x04 /* LD:54-57 */
1268 strd r8, [r3], #0x08 /* ST:40-47 */
1269 ldr r8, [r1], #0x04 /* LD:58-5b */
1270 ldr r9, [r1], #0x04 /* LD:5c-5f */
1271 strd r4, [r3], #0x08 /* ST:48-4f */
1272 ldr r4, [r1], #0x04 /* LD:60-63 */
1273 ldr r5, [r1], #0x04 /* LD:64-67 */
1274 pld [r1, #0x18] /* Prefetch 0x80 */
1275 strd r6, [r3], #0x08 /* ST:50-57 */
1276 ldr r6, [r1], #0x04 /* LD:68-6b */
1277 ldr r7, [r1], #0x04 /* LD:6c-6f */
1278 strd r8, [r3], #0x08 /* ST:58-5f */
1279 ldr r8, [r1], #0x04 /* LD:70-73 */
1280 ldr r9, [r1], #0x04 /* LD:74-77 */
1281 strd r4, [r3], #0x08 /* ST:60-67 */
1282 ldr r4, [r1], #0x04 /* LD:78-7b */
1283 ldr r5, [r1], #0x04 /* LD:7c-7f */
1284 strd r6, [r3], #0x08 /* ST:68-6f */
1285 strd r8, [r3], #0x08 /* ST:70-77 */
1286 subs r2, r2, #0x80
1287 strd r4, [r3], #0x08 /* ST:78-7f */
1288 bge .Lmemcpy_w_loop128
1289
1290 .Lmemcpy_w_lessthan128:
1291 adds r2, r2, #0x80 /* Adjust for extra sub */
1292 ldmeqfd sp!, {r4-r9}
1293 RETeq /* Return now if done */
1294 subs r2, r2, #0x20
1295 blt .Lmemcpy_w_lessthan32
1296
1297 /* Copy 32 bytes at a time */
1298 .Lmemcpy_w_loop32:
1299 ldr r4, [r1], #0x04
1300 ldr r5, [r1], #0x04
1301 pld [r1, #0x18]
1302 ldr r6, [r1], #0x04
1303 ldr r7, [r1], #0x04
1304 ldr r8, [r1], #0x04
1305 ldr r9, [r1], #0x04
1306 strd r4, [r3], #0x08
1307 ldr r4, [r1], #0x04
1308 ldr r5, [r1], #0x04
1309 strd r6, [r3], #0x08
1310 strd r8, [r3], #0x08
1311 subs r2, r2, #0x20
1312 strd r4, [r3], #0x08
1313 bge .Lmemcpy_w_loop32
1314
1315 .Lmemcpy_w_lessthan32:
1316 adds r2, r2, #0x20 /* Adjust for extra sub */
1317 ldmeqfd sp!, {r4-r9}
1318 RETeq /* Return now if done */
1319
1320 and r4, r2, #0x18
1321 rsbs r4, r4, #0x18
1322 addne pc, pc, r4, lsl #1
1323 nop
1324
1325 /* At least 24 bytes remaining */
1326 ldr r4, [r1], #0x04
1327 ldr r5, [r1], #0x04
1328 sub r2, r2, #0x08
1329 strd r4, [r3], #0x08
1330
1331 /* At least 16 bytes remaining */
1332 ldr r4, [r1], #0x04
1333 ldr r5, [r1], #0x04
1334 sub r2, r2, #0x08
1335 strd r4, [r3], #0x08
1336
1337 /* At least 8 bytes remaining */
1338 ldr r4, [r1], #0x04
1339 ldr r5, [r1], #0x04
1340 subs r2, r2, #0x08
1341 strd r4, [r3], #0x08
1342
1343 /* Less than 8 bytes remaining */
1344 ldmfd sp!, {r4-r9}
1345 RETeq /* Return now if done */
1346 subs r2, r2, #0x04
1347 ldrge ip, [r1], #0x04
1348 strge ip, [r3], #0x04
1349 RETeq /* Return now if done */
1350 addlt r2, r2, #0x04
1351 ldrb ip, [r1], #0x01
1352 cmp r2, #0x02
1353 ldrgeb r2, [r1], #0x01
1354 strb ip, [r3], #0x01
1355 ldrgtb ip, [r1]
1356 strgeb r2, [r3], #0x01
1357 strgtb ip, [r3]
1358 RET
1359
1360
1361 /*
1362 * At this point, it has not been possible to word align both buffers.
1363 * The destination buffer is word aligned, but the source buffer is not.
1364 */
1365 .Lmemcpy_bad_align:
1366 stmfd sp!, {r4-r7}
1367 bic r1, r1, #0x03
1368 cmp ip, #2
1369 ldr ip, [r1], #0x04
1370 bgt .Lmemcpy_bad3
1371 beq .Lmemcpy_bad2
1372 b .Lmemcpy_bad1
1373
1374 .Lmemcpy_bad1_loop16:
1375 #ifdef __ARMEB__
1376 mov r4, ip, lsl #8
1377 #else
1378 mov r4, ip, lsr #8
1379 #endif
1380 ldr r5, [r1], #0x04
1381 pld [r1, #0x018]
1382 ldr r6, [r1], #0x04
1383 ldr r7, [r1], #0x04
1384 ldr ip, [r1], #0x04
1385 #ifdef __ARMEB__
1386 orr r4, r4, r5, lsr #24
1387 mov r5, r5, lsl #8
1388 orr r5, r5, r6, lsr #24
1389 mov r6, r6, lsl #8
1390 orr r6, r6, r7, lsr #24
1391 mov r7, r7, lsl #8
1392 orr r7, r7, ip, lsr #24
1393 #else
1394 orr r4, r4, r5, lsl #24
1395 mov r5, r5, lsr #8
1396 orr r5, r5, r6, lsl #24
1397 mov r6, r6, lsr #8
1398 orr r6, r6, r7, lsl #24
1399 mov r7, r7, lsr #8
1400 orr r7, r7, ip, lsl #24
1401 #endif
1402 str r4, [r3], #0x04
1403 str r5, [r3], #0x04
1404 str r6, [r3], #0x04
1405 str r7, [r3], #0x04
1406 .Lmemcpy_bad1:
1407 subs r2, r2, #0x10
1408 bge .Lmemcpy_bad1_loop16
1409
1410 adds r2, r2, #0x10
1411 ldmeqfd sp!, {r4-r7}
1412 RETeq /* Return now if done */
1413 subs r2, r2, #0x04
1414 sublt r1, r1, #0x03
1415 blt .Lmemcpy_bad_done
1416
1417 .Lmemcpy_bad1_loop4:
1418 #ifdef __ARMEB__
1419 mov r4, ip, lsl #8
1420 #else
1421 mov r4, ip, lsr #8
1422 #endif
1423 ldr ip, [r1], #0x04
1424 subs r2, r2, #0x04
1425 #ifdef __ARMEB__
1426 orr r4, r4, ip, lsr #24
1427 #else
1428 orr r4, r4, ip, lsl #24
1429 #endif
1430 str r4, [r3], #0x04
1431 bge .Lmemcpy_bad1_loop4
1432 sub r1, r1, #0x03
1433 b .Lmemcpy_bad_done
1434
1435 .Lmemcpy_bad2_loop16:
1436 #ifdef __ARMEB__
1437 mov r4, ip, lsl #16
1438 #else
1439 mov r4, ip, lsr #16
1440 #endif
1441 ldr r5, [r1], #0x04
1442 pld [r1, #0x018]
1443 ldr r6, [r1], #0x04
1444 ldr r7, [r1], #0x04
1445 ldr ip, [r1], #0x04
1446 #ifdef __ARMEB__
1447 orr r4, r4, r5, lsr #16
1448 mov r5, r5, lsl #16
1449 orr r5, r5, r6, lsr #16
1450 mov r6, r6, lsl #16
1451 orr r6, r6, r7, lsr #16
1452 mov r7, r7, lsl #16
1453 orr r7, r7, ip, lsr #16
1454 #else
1455 orr r4, r4, r5, lsl #16
1456 mov r5, r5, lsr #16
1457 orr r5, r5, r6, lsl #16
1458 mov r6, r6, lsr #16
1459 orr r6, r6, r7, lsl #16
1460 mov r7, r7, lsr #16
1461 orr r7, r7, ip, lsl #16
1462 #endif
1463 str r4, [r3], #0x04
1464 str r5, [r3], #0x04
1465 str r6, [r3], #0x04
1466 str r7, [r3], #0x04
1467 .Lmemcpy_bad2:
1468 subs r2, r2, #0x10
1469 bge .Lmemcpy_bad2_loop16
1470
1471 adds r2, r2, #0x10
1472 ldmeqfd sp!, {r4-r7}
1473 RETeq /* Return now if done */
1474 subs r2, r2, #0x04
1475 sublt r1, r1, #0x02
1476 blt .Lmemcpy_bad_done
1477
1478 .Lmemcpy_bad2_loop4:
1479 #ifdef __ARMEB__
1480 mov r4, ip, lsl #16
1481 #else
1482 mov r4, ip, lsr #16
1483 #endif
1484 ldr ip, [r1], #0x04
1485 subs r2, r2, #0x04
1486 #ifdef __ARMEB__
1487 orr r4, r4, ip, lsr #16
1488 #else
1489 orr r4, r4, ip, lsl #16
1490 #endif
1491 str r4, [r3], #0x04
1492 bge .Lmemcpy_bad2_loop4
1493 sub r1, r1, #0x02
1494 b .Lmemcpy_bad_done
1495
1496 .Lmemcpy_bad3_loop16:
1497 #ifdef __ARMEB__
1498 mov r4, ip, lsl #24
1499 #else
1500 mov r4, ip, lsr #24
1501 #endif
1502 ldr r5, [r1], #0x04
1503 pld [r1, #0x018]
1504 ldr r6, [r1], #0x04
1505 ldr r7, [r1], #0x04
1506 ldr ip, [r1], #0x04
1507 #ifdef __ARMEB__
1508 orr r4, r4, r5, lsr #8
1509 mov r5, r5, lsl #24
1510 orr r5, r5, r6, lsr #8
1511 mov r6, r6, lsl #24
1512 orr r6, r6, r7, lsr #8
1513 mov r7, r7, lsl #24
1514 orr r7, r7, ip, lsr #8
1515 #else
1516 orr r4, r4, r5, lsl #8
1517 mov r5, r5, lsr #24
1518 orr r5, r5, r6, lsl #8
1519 mov r6, r6, lsr #24
1520 orr r6, r6, r7, lsl #8
1521 mov r7, r7, lsr #24
1522 orr r7, r7, ip, lsl #8
1523 #endif
1524 str r4, [r3], #0x04
1525 str r5, [r3], #0x04
1526 str r6, [r3], #0x04
1527 str r7, [r3], #0x04
1528 .Lmemcpy_bad3:
1529 subs r2, r2, #0x10
1530 bge .Lmemcpy_bad3_loop16
1531
1532 adds r2, r2, #0x10
1533 ldmeqfd sp!, {r4-r7}
1534 RETeq /* Return now if done */
1535 subs r2, r2, #0x04
1536 sublt r1, r1, #0x01
1537 blt .Lmemcpy_bad_done
1538
1539 .Lmemcpy_bad3_loop4:
1540 #ifdef __ARMEB__
1541 mov r4, ip, lsl #24
1542 #else
1543 mov r4, ip, lsr #24
1544 #endif
1545 ldr ip, [r1], #0x04
1546 subs r2, r2, #0x04
1547 #ifdef __ARMEB__
1548 orr r4, r4, ip, lsr #8
1549 #else
1550 orr r4, r4, ip, lsl #8
1551 #endif
1552 str r4, [r3], #0x04
1553 bge .Lmemcpy_bad3_loop4
1554 sub r1, r1, #0x01
1555
1556 .Lmemcpy_bad_done:
1557 ldmfd sp!, {r4-r7}
1558 adds r2, r2, #0x04
1559 RETeq
1560 ldrb ip, [r1], #0x01
1561 cmp r2, #0x02
1562 ldrgeb r2, [r1], #0x01
1563 strb ip, [r3], #0x01
1564 ldrgtb ip, [r1]
1565 strgeb r2, [r3], #0x01
1566 strgtb ip, [r3]
1567 RET
1568
1569
1570 /*
1571 * Handle short copies (less than 16 bytes), possibly misaligned.
1572 * Some of these are *very* common, thanks to the network stack,
1573 * and so are handled specially.
1574 */
1575 .Lmemcpy_short:
1576 add pc, pc, r2, lsl #2
1577 nop
1578 RET /* 0x00 */
1579 b .Lmemcpy_bytewise /* 0x01 */
1580 b .Lmemcpy_bytewise /* 0x02 */
1581 b .Lmemcpy_bytewise /* 0x03 */
1582 b .Lmemcpy_4 /* 0x04 */
1583 b .Lmemcpy_bytewise /* 0x05 */
1584 b .Lmemcpy_6 /* 0x06 */
1585 b .Lmemcpy_bytewise /* 0x07 */
1586 b .Lmemcpy_8 /* 0x08 */
1587 b .Lmemcpy_bytewise /* 0x09 */
1588 b .Lmemcpy_bytewise /* 0x0a */
1589 b .Lmemcpy_bytewise /* 0x0b */
1590 b .Lmemcpy_c /* 0x0c */
1591 .Lmemcpy_bytewise:
1592 mov r3, r0 /* We must not clobber r0 */
1593 ldrb ip, [r1], #0x01
1594 1: subs r2, r2, #0x01
1595 strb ip, [r3], #0x01
1596 ldrneb ip, [r1], #0x01
1597 bne 1b
1598 RET
1599
1600 /******************************************************************************
1601 * Special case for 4 byte copies
1602 */
1603 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1604 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1605 LMEMCPY_4_PAD
1606 .Lmemcpy_4:
1607 and r2, r1, #0x03
1608 orr r2, r2, r0, lsl #2
1609 ands r2, r2, #0x0f
1610 sub r3, pc, #0x14
1611 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1612
1613 /*
1614 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1615 */
1616 ldr r2, [r1]
1617 str r2, [r0]
1618 RET
1619 LMEMCPY_4_PAD
1620
1621 /*
1622 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1623 */
1624 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1625 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1626 #ifdef __ARMEB__
1627 mov r3, r3, lsl #8 /* r3 = 012. */
1628 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1629 #else
1630 mov r3, r3, lsr #8 /* r3 = .210 */
1631 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1632 #endif
1633 str r3, [r0]
1634 RET
1635 LMEMCPY_4_PAD
1636
1637 /*
1638 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1639 */
1640 #ifdef __ARMEB__
1641 ldrh r3, [r1]
1642 ldrh r2, [r1, #0x02]
1643 #else
1644 ldrh r3, [r1, #0x02]
1645 ldrh r2, [r1]
1646 #endif
1647 orr r3, r2, r3, lsl #16
1648 str r3, [r0]
1649 RET
1650 LMEMCPY_4_PAD
1651
1652 /*
1653 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1654 */
1655 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1656 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1657 #ifdef __ARMEB__
1658 mov r3, r3, lsl #24 /* r3 = 0... */
1659 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1660 #else
1661 mov r3, r3, lsr #24 /* r3 = ...0 */
1662 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1663 #endif
1664 str r3, [r0]
1665 RET
1666 LMEMCPY_4_PAD
1667
1668 /*
1669 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1670 */
1671 ldr r2, [r1]
1672 #ifdef __ARMEB__
1673 strb r2, [r0, #0x03]
1674 mov r3, r2, lsr #8
1675 mov r1, r2, lsr #24
1676 strb r1, [r0]
1677 #else
1678 strb r2, [r0]
1679 mov r3, r2, lsr #8
1680 mov r1, r2, lsr #24
1681 strb r1, [r0, #0x03]
1682 #endif
1683 strh r3, [r0, #0x01]
1684 RET
1685 LMEMCPY_4_PAD
1686
1687 /*
1688 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1689 */
1690 ldrb r2, [r1]
1691 ldrh r3, [r1, #0x01]
1692 ldrb r1, [r1, #0x03]
1693 strb r2, [r0]
1694 strh r3, [r0, #0x01]
1695 strb r1, [r0, #0x03]
1696 RET
1697 LMEMCPY_4_PAD
1698
1699 /*
1700 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1701 */
1702 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1703 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1704 #ifdef __ARMEB__
1705 mov r1, r2, lsr #8 /* r1 = ...0 */
1706 strb r1, [r0]
1707 mov r2, r2, lsl #8 /* r2 = .01. */
1708 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1709 #else
1710 strb r2, [r0]
1711 mov r2, r2, lsr #8 /* r2 = ...1 */
1712 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1713 mov r3, r3, lsr #8 /* r3 = ...3 */
1714 #endif
1715 strh r2, [r0, #0x01]
1716 strb r3, [r0, #0x03]
1717 RET
1718 LMEMCPY_4_PAD
1719
1720 /*
1721 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1722 */
1723 ldrb r2, [r1]
1724 ldrh r3, [r1, #0x01]
1725 ldrb r1, [r1, #0x03]
1726 strb r2, [r0]
1727 strh r3, [r0, #0x01]
1728 strb r1, [r0, #0x03]
1729 RET
1730 LMEMCPY_4_PAD
1731
1732 /*
1733 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1734 */
1735 ldr r2, [r1]
1736 #ifdef __ARMEB__
1737 strh r2, [r0, #0x02]
1738 mov r3, r2, lsr #16
1739 strh r3, [r0]
1740 #else
1741 strh r2, [r0]
1742 mov r3, r2, lsr #16
1743 strh r3, [r0, #0x02]
1744 #endif
1745 RET
1746 LMEMCPY_4_PAD
1747
1748 /*
1749 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1750 */
1751 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1752 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1753 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1754 strh r1, [r0]
1755 #ifdef __ARMEB__
1756 mov r2, r2, lsl #8 /* r2 = 012. */
1757 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1758 #else
1759 mov r2, r2, lsr #24 /* r2 = ...2 */
1760 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1761 #endif
1762 strh r2, [r0, #0x02]
1763 RET
1764 LMEMCPY_4_PAD
1765
1766 /*
1767 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1768 */
1769 ldrh r2, [r1]
1770 ldrh r3, [r1, #0x02]
1771 strh r2, [r0]
1772 strh r3, [r0, #0x02]
1773 RET
1774 LMEMCPY_4_PAD
1775
1776 /*
1777 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1778 */
1779 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1780 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1781 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1782 strh r1, [r0, #0x02]
1783 #ifdef __ARMEB__
1784 mov r3, r3, lsr #24 /* r3 = ...1 */
1785 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1786 #else
1787 mov r3, r3, lsl #8 /* r3 = 321. */
1788 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1789 #endif
1790 strh r3, [r0]
1791 RET
1792 LMEMCPY_4_PAD
1793
1794 /*
1795 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1796 */
1797 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1798 #ifdef __ARMEB__
1799 strb r2, [r0, #0x03]
1800 mov r3, r2, lsr #8
1801 mov r1, r2, lsr #24
1802 strh r3, [r0, #0x01]
1803 strb r1, [r0]
1804 #else
1805 strb r2, [r0]
1806 mov r3, r2, lsr #8
1807 mov r1, r2, lsr #24
1808 strh r3, [r0, #0x01]
1809 strb r1, [r0, #0x03]
1810 #endif
1811 RET
1812 LMEMCPY_4_PAD
1813
1814 /*
1815 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1816 */
1817 ldrb r2, [r1]
1818 ldrh r3, [r1, #0x01]
1819 ldrb r1, [r1, #0x03]
1820 strb r2, [r0]
1821 strh r3, [r0, #0x01]
1822 strb r1, [r0, #0x03]
1823 RET
1824 LMEMCPY_4_PAD
1825
1826 /*
1827 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1828 */
1829 #ifdef __ARMEB__
1830 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1831 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1832 strb r3, [r0, #0x03]
1833 mov r3, r3, lsr #8 /* r3 = ...2 */
1834 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1835 strh r3, [r0, #0x01]
1836 mov r2, r2, lsr #8 /* r2 = ...0 */
1837 strb r2, [r0]
1838 #else
1839 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1840 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1841 strb r2, [r0]
1842 mov r2, r2, lsr #8 /* r2 = ...1 */
1843 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1844 strh r2, [r0, #0x01]
1845 mov r3, r3, lsr #8 /* r3 = ...3 */
1846 strb r3, [r0, #0x03]
1847 #endif
1848 RET
1849 LMEMCPY_4_PAD
1850
1851 /*
1852 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1853 */
1854 ldrb r2, [r1]
1855 ldrh r3, [r1, #0x01]
1856 ldrb r1, [r1, #0x03]
1857 strb r2, [r0]
1858 strh r3, [r0, #0x01]
1859 strb r1, [r0, #0x03]
1860 RET
1861 LMEMCPY_4_PAD
1862
1863
1864 /******************************************************************************
1865 * Special case for 6 byte copies
1866 */
1867 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1868 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1869 LMEMCPY_6_PAD
1870 .Lmemcpy_6:
1871 and r2, r1, #0x03
1872 orr r2, r2, r0, lsl #2
1873 ands r2, r2, #0x0f
1874 sub r3, pc, #0x14
1875 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1876
1877 /*
1878 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1879 */
1880 ldr r2, [r1]
1881 ldrh r3, [r1, #0x04]
1882 str r2, [r0]
1883 strh r3, [r0, #0x04]
1884 RET
1885 LMEMCPY_6_PAD
1886
1887 /*
1888 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1889 */
1890 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1891 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1892 #ifdef __ARMEB__
1893 mov r2, r2, lsl #8 /* r2 = 012. */
1894 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1895 #else
1896 mov r2, r2, lsr #8 /* r2 = .210 */
1897 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1898 #endif
1899 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1900 str r2, [r0]
1901 strh r3, [r0, #0x04]
1902 RET
1903 LMEMCPY_6_PAD
1904
1905 /*
1906 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1907 */
1908 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1909 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1910 #ifdef __ARMEB__
1911 mov r1, r3, lsr #16 /* r1 = ..23 */
1912 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1913 str r1, [r0]
1914 strh r3, [r0, #0x04]
1915 #else
1916 mov r1, r3, lsr #16 /* r1 = ..54 */
1917 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1918 str r2, [r0]
1919 strh r1, [r0, #0x04]
1920 #endif
1921 RET
1922 LMEMCPY_6_PAD
1923
1924 /*
1925 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1926 */
1927 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1928 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1929 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1930 #ifdef __ARMEB__
1931 mov r2, r2, lsl #24 /* r2 = 0... */
1932 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1933 mov r3, r3, lsl #8 /* r3 = 234. */
1934 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1935 #else
1936 mov r2, r2, lsr #24 /* r2 = ...0 */
1937 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1938 mov r1, r1, lsl #8 /* r1 = xx5. */
1939 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1940 #endif
1941 str r2, [r0]
1942 strh r1, [r0, #0x04]
1943 RET
1944 LMEMCPY_6_PAD
1945
1946 /*
1947 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1948 */
1949 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1950 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1951 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1952 strh r1, [r0, #0x01]
1953 #ifdef __ARMEB__
1954 mov r1, r3, lsr #24 /* r1 = ...0 */
1955 strb r1, [r0]
1956 mov r3, r3, lsl #8 /* r3 = 123. */
1957 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1958 #else
1959 strb r3, [r0]
1960 mov r3, r3, lsr #24 /* r3 = ...3 */
1961 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1962 mov r2, r2, lsr #8 /* r2 = ...5 */
1963 #endif
1964 strh r3, [r0, #0x03]
1965 strb r2, [r0, #0x05]
1966 RET
1967 LMEMCPY_6_PAD
1968
1969 /*
1970 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1971 */
1972 ldrb r2, [r1]
1973 ldrh r3, [r1, #0x01]
1974 ldrh ip, [r1, #0x03]
1975 ldrb r1, [r1, #0x05]
1976 strb r2, [r0]
1977 strh r3, [r0, #0x01]
1978 strh ip, [r0, #0x03]
1979 strb r1, [r0, #0x05]
1980 RET
1981 LMEMCPY_6_PAD
1982
1983 /*
1984 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1985 */
1986 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1987 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1988 #ifdef __ARMEB__
1989 mov r3, r2, lsr #8 /* r3 = ...0 */
1990 strb r3, [r0]
1991 strb r1, [r0, #0x05]
1992 mov r3, r1, lsr #8 /* r3 = .234 */
1993 strh r3, [r0, #0x03]
1994 mov r3, r2, lsl #8 /* r3 = .01. */
1995 orr r3, r3, r1, lsr #24 /* r3 = .012 */
1996 strh r3, [r0, #0x01]
1997 #else
1998 strb r2, [r0]
1999 mov r3, r1, lsr #24
2000 strb r3, [r0, #0x05]
2001 mov r3, r1, lsr #8 /* r3 = .543 */
2002 strh r3, [r0, #0x03]
2003 mov r3, r2, lsr #8 /* r3 = ...1 */
2004 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2005 strh r3, [r0, #0x01]
2006 #endif
2007 RET
2008 LMEMCPY_6_PAD
2009
2010 /*
2011 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2012 */
2013 ldrb r2, [r1]
2014 ldrh r3, [r1, #0x01]
2015 ldrh ip, [r1, #0x03]
2016 ldrb r1, [r1, #0x05]
2017 strb r2, [r0]
2018 strh r3, [r0, #0x01]
2019 strh ip, [r0, #0x03]
2020 strb r1, [r0, #0x05]
2021 RET
2022 LMEMCPY_6_PAD
2023
2024 /*
2025 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2026 */
2027 #ifdef __ARMEB__
2028 ldr r2, [r1] /* r2 = 0123 */
2029 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2030 mov r1, r2, lsr #16 /* r1 = ..01 */
2031 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2032 strh r1, [r0]
2033 str r3, [r0, #0x02]
2034 #else
2035 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2036 ldr r3, [r1] /* r3 = 3210 */
2037 mov r2, r2, lsl #16 /* r2 = 54.. */
2038 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2039 strh r3, [r0]
2040 str r2, [r0, #0x02]
2041 #endif
2042 RET
2043 LMEMCPY_6_PAD
2044
2045 /*
2046 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2047 */
2048 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2049 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2050 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2051 #ifdef __ARMEB__
2052 mov r2, r2, lsr #8 /* r2 = .345 */
2053 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2054 #else
2055 mov r2, r2, lsl #8 /* r2 = 543. */
2056 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2057 #endif
2058 strh r1, [r0]
2059 str r2, [r0, #0x02]
2060 RET
2061 LMEMCPY_6_PAD
2062
2063 /*
2064 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2065 */
2066 ldrh r2, [r1]
2067 ldr r3, [r1, #0x02]
2068 strh r2, [r0]
2069 str r3, [r0, #0x02]
2070 RET
2071 LMEMCPY_6_PAD
2072
2073 /*
2074 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2075 */
2076 ldrb r3, [r1] /* r3 = ...0 */
2077 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2078 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2079 #ifdef __ARMEB__
2080 mov r3, r3, lsl #8 /* r3 = ..0. */
2081 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2082 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2083 #else
2084 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2085 mov r1, r1, lsl #24 /* r1 = 5... */
2086 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2087 #endif
2088 strh r3, [r0]
2089 str r1, [r0, #0x02]
2090 RET
2091 LMEMCPY_6_PAD
2092
2093 /*
2094 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2095 */
2096 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2097 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2098 #ifdef __ARMEB__
2099 mov r3, r2, lsr #24 /* r3 = ...0 */
2100 strb r3, [r0]
2101 mov r2, r2, lsl #8 /* r2 = 123. */
2102 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2103 #else
2104 strb r2, [r0]
2105 mov r2, r2, lsr #8 /* r2 = .321 */
2106 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2107 mov r1, r1, lsr #8 /* r1 = ...5 */
2108 #endif
2109 str r2, [r0, #0x01]
2110 strb r1, [r0, #0x05]
2111 RET
2112 LMEMCPY_6_PAD
2113
2114 /*
2115 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2116 */
2117 ldrb r2, [r1]
2118 ldrh r3, [r1, #0x01]
2119 ldrh ip, [r1, #0x03]
2120 ldrb r1, [r1, #0x05]
2121 strb r2, [r0]
2122 strh r3, [r0, #0x01]
2123 strh ip, [r0, #0x03]
2124 strb r1, [r0, #0x05]
2125 RET
2126 LMEMCPY_6_PAD
2127
2128 /*
2129 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2130 */
2131 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2132 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2133 #ifdef __ARMEB__
2134 mov r3, r2, lsr #8 /* r3 = ...0 */
2135 strb r3, [r0]
2136 mov r2, r2, lsl #24 /* r2 = 1... */
2137 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2138 #else
2139 strb r2, [r0]
2140 mov r2, r2, lsr #8 /* r2 = ...1 */
2141 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2142 mov r1, r1, lsr #24 /* r1 = ...5 */
2143 #endif
2144 str r2, [r0, #0x01]
2145 strb r1, [r0, #0x05]
2146 RET
2147 LMEMCPY_6_PAD
2148
2149 /*
2150 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2151 */
2152 ldrb r2, [r1]
2153 ldr r3, [r1, #0x01]
2154 ldrb r1, [r1, #0x05]
2155 strb r2, [r0]
2156 str r3, [r0, #0x01]
2157 strb r1, [r0, #0x05]
2158 RET
2159 LMEMCPY_6_PAD
2160
2161
2162 /******************************************************************************
2163 * Special case for 8 byte copies
2164 */
2165 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2166 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2167 LMEMCPY_8_PAD
2168 .Lmemcpy_8:
2169 and r2, r1, #0x03
2170 orr r2, r2, r0, lsl #2
2171 ands r2, r2, #0x0f
2172 sub r3, pc, #0x14
2173 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2174
2175 /*
2176 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2177 */
2178 ldr r2, [r1]
2179 ldr r3, [r1, #0x04]
2180 str r2, [r0]
2181 str r3, [r0, #0x04]
2182 RET
2183 LMEMCPY_8_PAD
2184
2185 /*
2186 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2187 */
2188 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2189 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2190 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2191 #ifdef __ARMEB__
2192 mov r3, r3, lsl #8 /* r3 = 012. */
2193 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2194 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2195 #else
2196 mov r3, r3, lsr #8 /* r3 = .210 */
2197 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2198 mov r1, r1, lsl #24 /* r1 = 7... */
2199 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2200 #endif
2201 str r3, [r0]
2202 str r2, [r0, #0x04]
2203 RET
2204 LMEMCPY_8_PAD
2205
2206 /*
2207 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2208 */
2209 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2210 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2211 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2212 #ifdef __ARMEB__
2213 mov r2, r2, lsl #16 /* r2 = 01.. */
2214 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2215 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2216 #else
2217 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2218 mov r3, r3, lsr #16 /* r3 = ..54 */
2219 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2220 #endif
2221 str r2, [r0]
2222 str r3, [r0, #0x04]
2223 RET
2224 LMEMCPY_8_PAD
2225
2226 /*
2227 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2228 */
2229 ldrb r3, [r1] /* r3 = ...0 */
2230 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2231 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2232 #ifdef __ARMEB__
2233 mov r3, r3, lsl #24 /* r3 = 0... */
2234 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2235 mov r2, r2, lsl #24 /* r2 = 4... */
2236 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2237 #else
2238 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2239 mov r2, r2, lsr #24 /* r2 = ...4 */
2240 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2241 #endif
2242 str r3, [r0]
2243 str r2, [r0, #0x04]
2244 RET
2245 LMEMCPY_8_PAD
2246
2247 /*
2248 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2249 */
2250 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2251 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2252 #ifdef __ARMEB__
2253 mov r1, r3, lsr #24 /* r1 = ...0 */
2254 strb r1, [r0]
2255 mov r1, r3, lsr #8 /* r1 = .012 */
2256 strb r2, [r0, #0x07]
2257 mov r3, r3, lsl #24 /* r3 = 3... */
2258 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2259 #else
2260 strb r3, [r0]
2261 mov r1, r2, lsr #24 /* r1 = ...7 */
2262 strb r1, [r0, #0x07]
2263 mov r1, r3, lsr #8 /* r1 = .321 */
2264 mov r3, r3, lsr #24 /* r3 = ...3 */
2265 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2266 #endif
2267 strh r1, [r0, #0x01]
2268 str r3, [r0, #0x03]
2269 RET
2270 LMEMCPY_8_PAD
2271
2272 /*
2273 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2274 */
2275 ldrb r2, [r1]
2276 ldrh r3, [r1, #0x01]
2277 ldr ip, [r1, #0x03]
2278 ldrb r1, [r1, #0x07]
2279 strb r2, [r0]
2280 strh r3, [r0, #0x01]
2281 str ip, [r0, #0x03]
2282 strb r1, [r0, #0x07]
2283 RET
2284 LMEMCPY_8_PAD
2285
2286 /*
2287 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2288 */
2289 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2290 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2291 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2292 #ifdef __ARMEB__
2293 mov ip, r2, lsr #8 /* ip = ...0 */
2294 strb ip, [r0]
2295 mov ip, r2, lsl #8 /* ip = .01. */
2296 orr ip, ip, r3, lsr #24 /* ip = .012 */
2297 strb r1, [r0, #0x07]
2298 mov r3, r3, lsl #8 /* r3 = 345. */
2299 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2300 #else
2301 strb r2, [r0] /* 0 */
2302 mov ip, r1, lsr #8 /* ip = ...7 */
2303 strb ip, [r0, #0x07] /* 7 */
2304 mov ip, r2, lsr #8 /* ip = ...1 */
2305 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2306 mov r3, r3, lsr #8 /* r3 = .543 */
2307 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2308 #endif
2309 strh ip, [r0, #0x01]
2310 str r3, [r0, #0x03]
2311 RET
2312 LMEMCPY_8_PAD
2313
2314 /*
2315 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2316 */
2317 ldrb r3, [r1] /* r3 = ...0 */
2318 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2319 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2320 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2321 strb r3, [r0]
2322 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2323 #ifdef __ARMEB__
2324 strh r3, [r0, #0x01]
2325 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2326 #else
2327 strh ip, [r0, #0x01]
2328 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2329 #endif
2330 str r2, [r0, #0x03]
2331 strb r1, [r0, #0x07]
2332 RET
2333 LMEMCPY_8_PAD
2334
2335 /*
2336 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2337 */
2338 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2339 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2340 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2341 #ifdef __ARMEB__
2342 strh r1, [r0]
2343 mov r1, r3, lsr #16 /* r1 = ..45 */
2344 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2345 #else
2346 strh r2, [r0]
2347 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2348 mov r3, r3, lsr #16 /* r3 = ..76 */
2349 #endif
2350 str r2, [r0, #0x02]
2351 strh r3, [r0, #0x06]
2352 RET
2353 LMEMCPY_8_PAD
2354
2355 /*
2356 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2357 */
2358 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2359 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2360 ldrb ip, [r1, #0x07] /* ip = ...7 */
2361 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2362 strh r1, [r0]
2363 #ifdef __ARMEB__
2364 mov r1, r2, lsl #24 /* r1 = 2... */
2365 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2366 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2367 #else
2368 mov r1, r2, lsr #24 /* r1 = ...2 */
2369 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2370 mov r3, r3, lsr #24 /* r3 = ...6 */
2371 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2372 #endif
2373 str r1, [r0, #0x02]
2374 strh r3, [r0, #0x06]
2375 RET
2376 LMEMCPY_8_PAD
2377
2378 /*
2379 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2380 */
2381 ldrh r2, [r1]
2382 ldr ip, [r1, #0x02]
2383 ldrh r3, [r1, #0x06]
2384 strh r2, [r0]
2385 str ip, [r0, #0x02]
2386 strh r3, [r0, #0x06]
2387 RET
2388 LMEMCPY_8_PAD
2389
2390 /*
2391 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2392 */
2393 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2394 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2395 ldrb ip, [r1] /* ip = ...0 */
2396 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2397 strh r1, [r0, #0x06]
2398 #ifdef __ARMEB__
2399 mov r3, r3, lsr #24 /* r3 = ...5 */
2400 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2401 mov r2, r2, lsr #24 /* r2 = ...1 */
2402 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2403 #else
2404 mov r3, r3, lsl #24 /* r3 = 5... */
2405 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2406 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2407 #endif
2408 str r3, [r0, #0x02]
2409 strh r2, [r0]
2410 RET
2411 LMEMCPY_8_PAD
2412
2413 /*
2414 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2415 */
2416 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2417 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2418 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2419 strh r1, [r0, #0x05]
2420 #ifdef __ARMEB__
2421 strb r3, [r0, #0x07]
2422 mov r1, r2, lsr #24 /* r1 = ...0 */
2423 strb r1, [r0]
2424 mov r2, r2, lsl #8 /* r2 = 123. */
2425 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2426 str r2, [r0, #0x01]
2427 #else
2428 strb r2, [r0]
2429 mov r1, r3, lsr #24 /* r1 = ...7 */
2430 strb r1, [r0, #0x07]
2431 mov r2, r2, lsr #8 /* r2 = .321 */
2432 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2433 str r2, [r0, #0x01]
2434 #endif
2435 RET
2436 LMEMCPY_8_PAD
2437
2438 /*
2439 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2440 */
2441 ldrb r3, [r1] /* r3 = ...0 */
2442 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2443 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2444 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2445 strb r3, [r0]
2446 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2447 #ifdef __ARMEB__
2448 strh ip, [r0, #0x05]
2449 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2450 #else
2451 strh r3, [r0, #0x05]
2452 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2453 #endif
2454 str r2, [r0, #0x01]
2455 strb r1, [r0, #0x07]
2456 RET
2457 LMEMCPY_8_PAD
2458
2459 /*
2460 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2461 */
2462 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2463 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2464 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2465 #ifdef __ARMEB__
2466 mov ip, r2, lsr #8 /* ip = ...0 */
2467 strb ip, [r0]
2468 mov ip, r2, lsl #24 /* ip = 1... */
2469 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2470 strb r1, [r0, #0x07]
2471 mov r1, r1, lsr #8 /* r1 = ...6 */
2472 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2473 #else
2474 strb r2, [r0]
2475 mov ip, r2, lsr #8 /* ip = ...1 */
2476 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2477 mov r2, r1, lsr #8 /* r2 = ...7 */
2478 strb r2, [r0, #0x07]
2479 mov r1, r1, lsl #8 /* r1 = .76. */
2480 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2481 #endif
2482 str ip, [r0, #0x01]
2483 strh r1, [r0, #0x05]
2484 RET
2485 LMEMCPY_8_PAD
2486
2487 /*
2488 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2489 */
2490 ldrb r2, [r1]
2491 ldr ip, [r1, #0x01]
2492 ldrh r3, [r1, #0x05]
2493 ldrb r1, [r1, #0x07]
2494 strb r2, [r0]
2495 str ip, [r0, #0x01]
2496 strh r3, [r0, #0x05]
2497 strb r1, [r0, #0x07]
2498 RET
2499 LMEMCPY_8_PAD
2500
2501 /******************************************************************************
2502 * Special case for 12 byte copies
2503 */
2504 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2505 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2506 LMEMCPY_C_PAD
2507 .Lmemcpy_c:
2508 and r2, r1, #0x03
2509 orr r2, r2, r0, lsl #2
2510 ands r2, r2, #0x0f
2511 sub r3, pc, #0x14
2512 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2513
2514 /*
2515 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2516 */
2517 ldr r2, [r1]
2518 ldr r3, [r1, #0x04]
2519 ldr r1, [r1, #0x08]
2520 str r2, [r0]
2521 str r3, [r0, #0x04]
2522 str r1, [r0, #0x08]
2523 RET
2524 LMEMCPY_C_PAD
2525
2526 /*
2527 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2528 */
2529 ldrb r2, [r1, #0xb] /* r2 = ...B */
2530 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2531 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2532 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2533 #ifdef __ARMEB__
2534 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2535 str r2, [r0, #0x08]
2536 mov r2, ip, lsr #24 /* r2 = ...7 */
2537 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2538 mov r1, r1, lsl #8 /* r1 = 012. */
2539 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2540 #else
2541 mov r2, r2, lsl #24 /* r2 = B... */
2542 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2543 str r2, [r0, #0x08]
2544 mov r2, ip, lsl #24 /* r2 = 7... */
2545 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2546 mov r1, r1, lsr #8 /* r1 = .210 */
2547 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2548 #endif
2549 str r2, [r0, #0x04]
2550 str r1, [r0]
2551 RET
2552 LMEMCPY_C_PAD
2553
2554 /*
2555 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2556 */
2557 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2558 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2559 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2560 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2561 #ifdef __ARMEB__
2562 mov r2, r2, lsl #16 /* r2 = 01.. */
2563 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2564 str r2, [r0]
2565 mov r3, r3, lsl #16 /* r3 = 45.. */
2566 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2567 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2568 #else
2569 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2570 str r2, [r0]
2571 mov r3, r3, lsr #16 /* r3 = ..54 */
2572 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2573 mov r1, r1, lsl #16 /* r1 = BA.. */
2574 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2575 #endif
2576 str r3, [r0, #0x04]
2577 str r1, [r0, #0x08]
2578 RET
2579 LMEMCPY_C_PAD
2580
2581 /*
2582 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2583 */
2584 ldrb r2, [r1] /* r2 = ...0 */
2585 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2586 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2587 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2588 #ifdef __ARMEB__
2589 mov r2, r2, lsl #24 /* r2 = 0... */
2590 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2591 str r2, [r0]
2592 mov r3, r3, lsl #24 /* r3 = 4... */
2593 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2594 mov r1, r1, lsr #8 /* r1 = .9AB */
2595 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2596 #else
2597 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2598 str r2, [r0]
2599 mov r3, r3, lsr #24 /* r3 = ...4 */
2600 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2601 mov r1, r1, lsl #8 /* r1 = BA9. */
2602 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2603 #endif
2604 str r3, [r0, #0x04]
2605 str r1, [r0, #0x08]
2606 RET
2607 LMEMCPY_C_PAD
2608
2609 /*
2610 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2611 */
2612 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2613 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2614 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2615 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2616 strh r1, [r0, #0x01]
2617 #ifdef __ARMEB__
2618 mov r1, r2, lsr #24 /* r1 = ...0 */
2619 strb r1, [r0]
2620 mov r1, r2, lsl #24 /* r1 = 3... */
2621 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2622 mov r1, r3, lsl #24 /* r1 = 7... */
2623 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2624 #else
2625 strb r2, [r0]
2626 mov r1, r2, lsr #24 /* r1 = ...3 */
2627 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2628 mov r1, r3, lsr #24 /* r1 = ...7 */
2629 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2630 mov ip, ip, lsr #24 /* ip = ...B */
2631 #endif
2632 str r2, [r0, #0x03]
2633 str r1, [r0, #0x07]
2634 strb ip, [r0, #0x0b]
2635 RET
2636 LMEMCPY_C_PAD
2637
2638 /*
2639 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2640 */
2641 ldrb r2, [r1]
2642 ldrh r3, [r1, #0x01]
2643 ldr ip, [r1, #0x03]
2644 strb r2, [r0]
2645 ldr r2, [r1, #0x07]
2646 ldrb r1, [r1, #0x0b]
2647 strh r3, [r0, #0x01]
2648 str ip, [r0, #0x03]
2649 str r2, [r0, #0x07]
2650 strb r1, [r0, #0x0b]
2651 RET
2652 LMEMCPY_C_PAD
2653
2654 /*
2655 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2656 */
2657 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2658 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2659 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2660 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2661 #ifdef __ARMEB__
2662 mov r2, r2, ror #8 /* r2 = 1..0 */
2663 strb r2, [r0]
2664 mov r2, r2, lsr #16 /* r2 = ..1. */
2665 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2666 strh r2, [r0, #0x01]
2667 mov r2, r3, lsl #8 /* r2 = 345. */
2668 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2669 mov r2, ip, lsl #8 /* r2 = 789. */
2670 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2671 #else
2672 strb r2, [r0]
2673 mov r2, r2, lsr #8 /* r2 = ...1 */
2674 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2675 strh r2, [r0, #0x01]
2676 mov r2, r3, lsr #8 /* r2 = .543 */
2677 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2678 mov r2, ip, lsr #8 /* r2 = .987 */
2679 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2680 mov r1, r1, lsr #8 /* r1 = ...B */
2681 #endif
2682 str r3, [r0, #0x03]
2683 str r2, [r0, #0x07]
2684 strb r1, [r0, #0x0b]
2685 RET
2686 LMEMCPY_C_PAD
2687
2688 /*
2689 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2690 */
2691 ldrb r2, [r1]
2692 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2693 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2694 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2695 strb r2, [r0]
2696 #ifdef __ARMEB__
2697 mov r2, r3, lsr #16 /* r2 = ..12 */
2698 strh r2, [r0, #0x01]
2699 mov r3, r3, lsl #16 /* r3 = 34.. */
2700 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2701 mov ip, ip, lsl #16 /* ip = 78.. */
2702 orr ip, ip, r1, lsr #16 /* ip = 789A */
2703 mov r1, r1, lsr #8 /* r1 = .9AB */
2704 #else
2705 strh r3, [r0, #0x01]
2706 mov r3, r3, lsr #16 /* r3 = ..43 */
2707 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2708 mov ip, ip, lsr #16 /* ip = ..87 */
2709 orr ip, ip, r1, lsl #16 /* ip = A987 */
2710 mov r1, r1, lsr #16 /* r1 = ..xB */
2711 #endif
2712 str r3, [r0, #0x03]
2713 str ip, [r0, #0x07]
2714 strb r1, [r0, #0x0b]
2715 RET
2716 LMEMCPY_C_PAD
2717
2718 /*
2719 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2720 */
2721 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2722 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2723 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2724 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2725 #ifdef __ARMEB__
2726 strh r1, [r0]
2727 mov r1, ip, lsl #16 /* r1 = 23.. */
2728 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2729 mov r3, r3, lsl #16 /* r3 = 67.. */
2730 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2731 #else
2732 strh ip, [r0]
2733 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2734 mov r3, r3, lsr #16 /* r3 = ..76 */
2735 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2736 mov r2, r2, lsr #16 /* r2 = ..BA */
2737 #endif
2738 str r1, [r0, #0x02]
2739 str r3, [r0, #0x06]
2740 strh r2, [r0, #0x0a]
2741 RET
2742 LMEMCPY_C_PAD
2743
2744 /*
2745 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2746 */
2747 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2748 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2749 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2750 strh ip, [r0]
2751 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2752 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2753 #ifdef __ARMEB__
2754 mov r2, r2, lsl #24 /* r2 = 2... */
2755 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2756 mov r3, r3, lsl #24 /* r3 = 6... */
2757 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2758 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2759 #else
2760 mov r2, r2, lsr #24 /* r2 = ...2 */
2761 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2762 mov r3, r3, lsr #24 /* r3 = ...6 */
2763 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2764 mov r1, r1, lsl #8 /* r1 = ..B. */
2765 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2766 #endif
2767 str r2, [r0, #0x02]
2768 str r3, [r0, #0x06]
2769 strh r1, [r0, #0x0a]
2770 RET
2771 LMEMCPY_C_PAD
2772
2773 /*
2774 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2775 */
2776 ldrh r2, [r1]
2777 ldr r3, [r1, #0x02]
2778 ldr ip, [r1, #0x06]
2779 ldrh r1, [r1, #0x0a]
2780 strh r2, [r0]
2781 str r3, [r0, #0x02]
2782 str ip, [r0, #0x06]
2783 strh r1, [r0, #0x0a]
2784 RET
2785 LMEMCPY_C_PAD
2786
2787 /*
2788 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2789 */
2790 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2791 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2792 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2793 strh ip, [r0, #0x0a]
2794 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2795 ldrb r1, [r1] /* r1 = ...0 */
2796 #ifdef __ARMEB__
2797 mov r2, r2, lsr #24 /* r2 = ...9 */
2798 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2799 mov r3, r3, lsr #24 /* r3 = ...5 */
2800 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2801 mov r1, r1, lsl #8 /* r1 = ..0. */
2802 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2803 #else
2804 mov r2, r2, lsl #24 /* r2 = 9... */
2805 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2806 mov r3, r3, lsl #24 /* r3 = 5... */
2807 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2808 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2809 #endif
2810 str r2, [r0, #0x06]
2811 str r3, [r0, #0x02]
2812 strh r1, [r0]
2813 RET
2814 LMEMCPY_C_PAD
2815
2816 /*
2817 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2818 */
2819 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2820 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2821 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2822 #ifdef __ARMEB__
2823 mov r3, r2, lsr #24 /* r3 = ...0 */
2824 strb r3, [r0]
2825 mov r2, r2, lsl #8 /* r2 = 123. */
2826 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2827 str r2, [r0, #0x01]
2828 mov r2, ip, lsl #8 /* r2 = 567. */
2829 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2830 str r2, [r0, #0x05]
2831 mov r2, r1, lsr #8 /* r2 = ..9A */
2832 strh r2, [r0, #0x09]
2833 strb r1, [r0, #0x0b]
2834 #else
2835 strb r2, [r0]
2836 mov r3, r2, lsr #8 /* r3 = .321 */
2837 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2838 str r3, [r0, #0x01]
2839 mov r3, ip, lsr #8 /* r3 = .765 */
2840 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2841 str r3, [r0, #0x05]
2842 mov r1, r1, lsr #8 /* r1 = .BA9 */
2843 strh r1, [r0, #0x09]
2844 mov r1, r1, lsr #16 /* r1 = ...B */
2845 strb r1, [r0, #0x0b]
2846 #endif
2847 RET
2848 LMEMCPY_C_PAD
2849
2850 /*
2851 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2852 */
2853 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2854 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2855 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2856 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2857 strb r2, [r0, #0x0b]
2858 #ifdef __ARMEB__
2859 strh r3, [r0, #0x09]
2860 mov r3, r3, lsr #16 /* r3 = ..78 */
2861 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2862 mov ip, ip, lsr #16 /* ip = ..34 */
2863 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2864 mov r1, r1, lsr #16 /* r1 = ..x0 */
2865 #else
2866 mov r2, r3, lsr #16 /* r2 = ..A9 */
2867 strh r2, [r0, #0x09]
2868 mov r3, r3, lsl #16 /* r3 = 87.. */
2869 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2870 mov ip, ip, lsl #16 /* ip = 43.. */
2871 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2872 mov r1, r1, lsr #8 /* r1 = .210 */
2873 #endif
2874 str r3, [r0, #0x05]
2875 str ip, [r0, #0x01]
2876 strb r1, [r0]
2877 RET
2878 LMEMCPY_C_PAD
2879
2880 /*
2881 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2882 */
2883 #ifdef __ARMEB__
2884 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2885 ldr ip, [r1, #0x06] /* ip = 6789 */
2886 ldr r3, [r1, #0x02] /* r3 = 2345 */
2887 ldrh r1, [r1] /* r1 = ..01 */
2888 strb r2, [r0, #0x0b]
2889 mov r2, r2, lsr #8 /* r2 = ...A */
2890 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2891 mov ip, ip, lsr #8 /* ip = .678 */
2892 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2893 mov r3, r3, lsr #8 /* r3 = .234 */
2894 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2895 mov r1, r1, lsr #8 /* r1 = ...0 */
2896 strb r1, [r0]
2897 str r3, [r0, #0x01]
2898 str ip, [r0, #0x05]
2899 strh r2, [r0, #0x09]
2900 #else
2901 ldrh r2, [r1] /* r2 = ..10 */
2902 ldr r3, [r1, #0x02] /* r3 = 5432 */
2903 ldr ip, [r1, #0x06] /* ip = 9876 */
2904 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2905 strb r2, [r0]
2906 mov r2, r2, lsr #8 /* r2 = ...1 */
2907 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2908 mov r3, r3, lsr #24 /* r3 = ...5 */
2909 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2910 mov ip, ip, lsr #24 /* ip = ...9 */
2911 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2912 mov r1, r1, lsr #8 /* r1 = ...B */
2913 str r2, [r0, #0x01]
2914 str r3, [r0, #0x05]
2915 strh ip, [r0, #0x09]
2916 strb r1, [r0, #0x0b]
2917 #endif
2918 RET
2919 LMEMCPY_C_PAD
2920
2921 /*
2922 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2923 */
2924 ldrb r2, [r1]
2925 ldr r3, [r1, #0x01]
2926 ldr ip, [r1, #0x05]
2927 strb r2, [r0]
2928 ldrh r2, [r1, #0x09]
2929 ldrb r1, [r1, #0x0b]
2930 str r3, [r0, #0x01]
2931 str ip, [r0, #0x05]
2932 strh r2, [r0, #0x09]
2933 strb r1, [r0, #0x0b]
2934 RET
2935 #endif /* _ARM_ARCH_5E */
2936
2937 #ifdef GPROF
2938
2939 ENTRY(user)
2940 nop
2941 ENTRY(btrap)
2942 nop
2943 ENTRY(etrap)
2944 nop
2945 ENTRY(bintr)
2946 nop
2947 ENTRY(eintr)
2948 nop
2949
2950 #endif
Cache object: c57a09b203bf749691ffe98d08926ecc
|