FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
29 *
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
46 * written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 */
60 /*
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
63 *
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 *
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
87 */
88
89 #include <machine/asm.h>
90 __FBSDID("$FreeBSD: releng/10.2/sys/arm/arm/support.S 275767 2014-12-14 16:28:53Z andrew $");
91
92 #include "assym.s"
93
94 .syntax unified
95
96 .L_arm_memcpy:
97 .word _C_LABEL(_arm_memcpy)
98 .L_arm_bzero:
99 .word _C_LABEL(_arm_bzero)
100 .L_min_memcpy_size:
101 .word _C_LABEL(_min_memcpy_size)
102 .L_min_bzero_size:
103 .word _C_LABEL(_min_bzero_size)
104 /*
105 * memset: Sets a block of memory to the specified value
106 *
107 * On entry:
108 * r0 - dest address
109 * r1 - byte to write
110 * r2 - number of bytes to write
111 *
112 * On exit:
113 * r0 - dest address
114 */
115 /* LINTSTUB: Func: void bzero(void *, size_t) */
116 ENTRY(bzero)
117 ldr r3, .L_arm_bzero
118 ldr r3, [r3]
119 cmp r3, #0
120 beq .Lnormal0
121 ldr r2, .L_min_bzero_size
122 ldr r2, [r2]
123 cmp r1, r2
124 blt .Lnormal0
125 stmfd sp!, {r0, r1, lr}
126 mov r2, #0
127 mov lr, pc
128 mov pc, r3
129 cmp r0, #0
130 ldmfd sp!, {r0, r1, lr}
131 RETeq
132 .Lnormal0:
133 mov r3, #0x00
134 b do_memset
135 END(bzero)
136 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
137 ENTRY(memset)
138 and r3, r1, #0xff /* We deal with bytes */
139 mov r1, r2
140 do_memset:
141 cmp r1, #0x04 /* Do we have less than 4 bytes */
142 mov ip, r0
143 blt .Lmemset_lessthanfour
144
145 /* Ok first we will word align the address */
146 ands r2, ip, #0x03 /* Get the bottom two bits */
147 bne .Lmemset_wordunaligned /* The address is not word aligned */
148
149 /* We are now word aligned */
150 .Lmemset_wordaligned:
151 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
152 #ifdef _ARM_ARCH_5E
153 tst ip, #0x04 /* Quad-align for armv5e */
154 #else
155 cmp r1, #0x10
156 #endif
157 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
158 #ifdef _ARM_ARCH_5E
159 subne r1, r1, #0x04 /* Quad-align if necessary */
160 strne r3, [ip], #0x04
161 cmp r1, #0x10
162 #endif
163 blt .Lmemset_loop4 /* If less than 16 then use words */
164 mov r2, r3 /* Duplicate data */
165 cmp r1, #0x80 /* If < 128 then skip the big loop */
166 blt .Lmemset_loop32
167
168 /* Do 128 bytes at a time */
169 .Lmemset_loop128:
170 subs r1, r1, #0x80
171 #ifdef _ARM_ARCH_5E
172 strdge r2, [ip], #0x08
173 strdge r2, [ip], #0x08
174 strdge r2, [ip], #0x08
175 strdge r2, [ip], #0x08
176 strdge r2, [ip], #0x08
177 strdge r2, [ip], #0x08
178 strdge r2, [ip], #0x08
179 strdge r2, [ip], #0x08
180 strdge r2, [ip], #0x08
181 strdge r2, [ip], #0x08
182 strdge r2, [ip], #0x08
183 strdge r2, [ip], #0x08
184 strdge r2, [ip], #0x08
185 strdge r2, [ip], #0x08
186 strdge r2, [ip], #0x08
187 strdge r2, [ip], #0x08
188 #else
189 stmiage ip!, {r2-r3}
190 stmiage ip!, {r2-r3}
191 stmiage ip!, {r2-r3}
192 stmiage ip!, {r2-r3}
193 stmiage ip!, {r2-r3}
194 stmiage ip!, {r2-r3}
195 stmiage ip!, {r2-r3}
196 stmiage ip!, {r2-r3}
197 stmiage ip!, {r2-r3}
198 stmiage ip!, {r2-r3}
199 stmiage ip!, {r2-r3}
200 stmiage ip!, {r2-r3}
201 stmiage ip!, {r2-r3}
202 stmiage ip!, {r2-r3}
203 stmiage ip!, {r2-r3}
204 stmiage ip!, {r2-r3}
205 #endif
206 bgt .Lmemset_loop128
207 RETeq /* Zero length so just exit */
208
209 add r1, r1, #0x80 /* Adjust for extra sub */
210
211 /* Do 32 bytes at a time */
212 .Lmemset_loop32:
213 subs r1, r1, #0x20
214 #ifdef _ARM_ARCH_5E
215 strdge r2, [ip], #0x08
216 strdge r2, [ip], #0x08
217 strdge r2, [ip], #0x08
218 strdge r2, [ip], #0x08
219 #else
220 stmiage ip!, {r2-r3}
221 stmiage ip!, {r2-r3}
222 stmiage ip!, {r2-r3}
223 stmiage ip!, {r2-r3}
224 #endif
225 bgt .Lmemset_loop32
226 RETeq /* Zero length so just exit */
227
228 adds r1, r1, #0x10 /* Partially adjust for extra sub */
229
230 /* Deal with 16 bytes or more */
231 #ifdef _ARM_ARCH_5E
232 strdge r2, [ip], #0x08
233 strdge r2, [ip], #0x08
234 #else
235 stmiage ip!, {r2-r3}
236 stmiage ip!, {r2-r3}
237 #endif
238 RETeq /* Zero length so just exit */
239
240 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
241
242 /* We have at least 4 bytes so copy as words */
243 .Lmemset_loop4:
244 subs r1, r1, #0x04
245 strge r3, [ip], #0x04
246 bgt .Lmemset_loop4
247 RETeq /* Zero length so just exit */
248
249 #ifdef _ARM_ARCH_5E
250 /* Compensate for 64-bit alignment check */
251 adds r1, r1, #0x04
252 RETeq
253 cmp r1, #2
254 #else
255 cmp r1, #-2
256 #endif
257
258 strb r3, [ip], #0x01 /* Set 1 byte */
259 strbge r3, [ip], #0x01 /* Set another byte */
260 strbgt r3, [ip] /* and a third */
261 RET /* Exit */
262
263 .Lmemset_wordunaligned:
264 rsb r2, r2, #0x004
265 strb r3, [ip], #0x01 /* Set 1 byte */
266 cmp r2, #0x02
267 strbge r3, [ip], #0x01 /* Set another byte */
268 sub r1, r1, r2
269 strbgt r3, [ip], #0x01 /* and a third */
270 cmp r1, #0x04 /* More than 4 bytes left? */
271 bge .Lmemset_wordaligned /* Yup */
272
273 .Lmemset_lessthanfour:
274 cmp r1, #0x00
275 RETeq /* Zero length so exit */
276 strb r3, [ip], #0x01 /* Set 1 byte */
277 cmp r1, #0x02
278 strbge r3, [ip], #0x01 /* Set another byte */
279 strbgt r3, [ip] /* and a third */
280 RET /* Exit */
281 EEND(memset)
282 END(bzero)
283
284 ENTRY(bcmp)
285 mov ip, r0
286 cmp r2, #0x06
287 beq .Lmemcmp_6bytes
288 mov r0, #0x00
289
290 /* Are both addresses aligned the same way? */
291 cmp r2, #0x00
292 eorsne r3, ip, r1
293 RETeq /* len == 0, or same addresses! */
294 tst r3, #0x03
295 subne r2, r2, #0x01
296 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
297
298 /* Word-align the addresses, if necessary */
299 sub r3, r1, #0x05
300 ands r3, r3, #0x03
301 add r3, r3, r3, lsl #1
302 addne pc, pc, r3, lsl #3
303 nop
304
305 /* Compare up to 3 bytes */
306 ldrb r0, [ip], #0x01
307 ldrb r3, [r1], #0x01
308 subs r0, r0, r3
309 RETne
310 subs r2, r2, #0x01
311 RETeq
312
313 /* Compare up to 2 bytes */
314 ldrb r0, [ip], #0x01
315 ldrb r3, [r1], #0x01
316 subs r0, r0, r3
317 RETne
318 subs r2, r2, #0x01
319 RETeq
320
321 /* Compare 1 byte */
322 ldrb r0, [ip], #0x01
323 ldrb r3, [r1], #0x01
324 subs r0, r0, r3
325 RETne
326 subs r2, r2, #0x01
327 RETeq
328
329 /* Compare 4 bytes at a time, if possible */
330 subs r2, r2, #0x04
331 bcc .Lmemcmp_bytewise
332 .Lmemcmp_word_aligned:
333 ldr r0, [ip], #0x04
334 ldr r3, [r1], #0x04
335 subs r2, r2, #0x04
336 cmpcs r0, r3
337 beq .Lmemcmp_word_aligned
338 sub r0, r0, r3
339
340 /* Correct for extra subtraction, and check if done */
341 adds r2, r2, #0x04
342 cmpeq r0, #0x00 /* If done, did all bytes match? */
343 RETeq /* Yup. Just return */
344
345 /* Re-do the final word byte-wise */
346 sub ip, ip, #0x04
347 sub r1, r1, #0x04
348
349 .Lmemcmp_bytewise:
350 add r2, r2, #0x03
351 .Lmemcmp_bytewise2:
352 ldrb r0, [ip], #0x01
353 ldrb r3, [r1], #0x01
354 subs r2, r2, #0x01
355 cmpcs r0, r3
356 beq .Lmemcmp_bytewise2
357 sub r0, r0, r3
358 RET
359
360 /*
361 * 6 byte compares are very common, thanks to the network stack.
362 * This code is hand-scheduled to reduce the number of stalls for
363 * load results. Everything else being equal, this will be ~32%
364 * faster than a byte-wise memcmp.
365 */
366 .align 5
367 .Lmemcmp_6bytes:
368 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
369 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
370 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
371 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
372 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */
373 RETne /* Return if mismatch on #0 */
374 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
375 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */
376 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */
377 RETne /* Return if mismatch on #1 */
378 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
379 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
380 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */
381 RETne /* Return if mismatch on #2 */
382 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
383 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */
384 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */
385 RETne /* Return if mismatch on #3 */
386 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
387 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
388 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */
389 RETne /* Return if mismatch on #4 */
390 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
391 RET
392 END(bcmp)
393
394 ENTRY(bcopy)
395 /* switch the source and destination registers */
396 eor r0, r1, r0
397 eor r1, r0, r1
398 eor r0, r1, r0
399 EENTRY(memmove)
400 /* Do the buffers overlap? */
401 cmp r0, r1
402 RETeq /* Bail now if src/dst are the same */
403 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
404 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
405 cmp r3, r2 /* if (r3 < len) we have an overlap */
406 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
407
408 /* Determine copy direction */
409 cmp r1, r0
410 bcc .Lmemmove_backwards
411
412 moveq r0, #0 /* Quick abort for len=0 */
413 RETeq
414
415 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
416 subs r2, r2, #4
417 blt .Lmemmove_fl4 /* less than 4 bytes */
418 ands r12, r0, #3
419 bne .Lmemmove_fdestul /* oh unaligned destination addr */
420 ands r12, r1, #3
421 bne .Lmemmove_fsrcul /* oh unaligned source addr */
422
423 .Lmemmove_ft8:
424 /* We have aligned source and destination */
425 subs r2, r2, #8
426 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
427 subs r2, r2, #0x14
428 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
429 stmdb sp!, {r4} /* borrow r4 */
430
431 /* blat 32 bytes at a time */
432 /* XXX for really big copies perhaps we should use more registers */
433 .Lmemmove_floop32:
434 ldmia r1!, {r3, r4, r12, lr}
435 stmia r0!, {r3, r4, r12, lr}
436 ldmia r1!, {r3, r4, r12, lr}
437 stmia r0!, {r3, r4, r12, lr}
438 subs r2, r2, #0x20
439 bge .Lmemmove_floop32
440
441 cmn r2, #0x10
442 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
443 stmiage r0!, {r3, r4, r12, lr}
444 subge r2, r2, #0x10
445 ldmia sp!, {r4} /* return r4 */
446
447 .Lmemmove_fl32:
448 adds r2, r2, #0x14
449
450 /* blat 12 bytes at a time */
451 .Lmemmove_floop12:
452 ldmiage r1!, {r3, r12, lr}
453 stmiage r0!, {r3, r12, lr}
454 subsge r2, r2, #0x0c
455 bge .Lmemmove_floop12
456
457 .Lmemmove_fl12:
458 adds r2, r2, #8
459 blt .Lmemmove_fl4
460
461 subs r2, r2, #4
462 ldrlt r3, [r1], #4
463 strlt r3, [r0], #4
464 ldmiage r1!, {r3, r12}
465 stmiage r0!, {r3, r12}
466 subge r2, r2, #4
467
468 .Lmemmove_fl4:
469 /* less than 4 bytes to go */
470 adds r2, r2, #4
471 ldmiaeq sp!, {r0, pc} /* done */
472
473 /* copy the crud byte at a time */
474 cmp r2, #2
475 ldrb r3, [r1], #1
476 strb r3, [r0], #1
477 ldrbge r3, [r1], #1
478 strbge r3, [r0], #1
479 ldrbgt r3, [r1], #1
480 strbgt r3, [r0], #1
481 ldmia sp!, {r0, pc}
482
483 /* erg - unaligned destination */
484 .Lmemmove_fdestul:
485 rsb r12, r12, #4
486 cmp r12, #2
487
488 /* align destination with byte copies */
489 ldrb r3, [r1], #1
490 strb r3, [r0], #1
491 ldrbge r3, [r1], #1
492 strbge r3, [r0], #1
493 ldrbgt r3, [r1], #1
494 strbgt r3, [r0], #1
495 subs r2, r2, r12
496 blt .Lmemmove_fl4 /* less the 4 bytes */
497
498 ands r12, r1, #3
499 beq .Lmemmove_ft8 /* we have an aligned source */
500
501 /* erg - unaligned source */
502 /* This is where it gets nasty ... */
503 .Lmemmove_fsrcul:
504 bic r1, r1, #3
505 ldr lr, [r1], #4
506 cmp r12, #2
507 bgt .Lmemmove_fsrcul3
508 beq .Lmemmove_fsrcul2
509 cmp r2, #0x0c
510 blt .Lmemmove_fsrcul1loop4
511 sub r2, r2, #0x0c
512 stmdb sp!, {r4, r5}
513
514 .Lmemmove_fsrcul1loop16:
515 #ifdef __ARMEB__
516 mov r3, lr, lsl #8
517 #else
518 mov r3, lr, lsr #8
519 #endif
520 ldmia r1!, {r4, r5, r12, lr}
521 #ifdef __ARMEB__
522 orr r3, r3, r4, lsr #24
523 mov r4, r4, lsl #8
524 orr r4, r4, r5, lsr #24
525 mov r5, r5, lsl #8
526 orr r5, r5, r12, lsr #24
527 mov r12, r12, lsl #8
528 orr r12, r12, lr, lsr #24
529 #else
530 orr r3, r3, r4, lsl #24
531 mov r4, r4, lsr #8
532 orr r4, r4, r5, lsl #24
533 mov r5, r5, lsr #8
534 orr r5, r5, r12, lsl #24
535 mov r12, r12, lsr #8
536 orr r12, r12, lr, lsl #24
537 #endif
538 stmia r0!, {r3-r5, r12}
539 subs r2, r2, #0x10
540 bge .Lmemmove_fsrcul1loop16
541 ldmia sp!, {r4, r5}
542 adds r2, r2, #0x0c
543 blt .Lmemmove_fsrcul1l4
544
545 .Lmemmove_fsrcul1loop4:
546 #ifdef __ARMEB__
547 mov r12, lr, lsl #8
548 #else
549 mov r12, lr, lsr #8
550 #endif
551 ldr lr, [r1], #4
552 #ifdef __ARMEB__
553 orr r12, r12, lr, lsr #24
554 #else
555 orr r12, r12, lr, lsl #24
556 #endif
557 str r12, [r0], #4
558 subs r2, r2, #4
559 bge .Lmemmove_fsrcul1loop4
560
561 .Lmemmove_fsrcul1l4:
562 sub r1, r1, #3
563 b .Lmemmove_fl4
564
565 .Lmemmove_fsrcul2:
566 cmp r2, #0x0c
567 blt .Lmemmove_fsrcul2loop4
568 sub r2, r2, #0x0c
569 stmdb sp!, {r4, r5}
570
571 .Lmemmove_fsrcul2loop16:
572 #ifdef __ARMEB__
573 mov r3, lr, lsl #16
574 #else
575 mov r3, lr, lsr #16
576 #endif
577 ldmia r1!, {r4, r5, r12, lr}
578 #ifdef __ARMEB__
579 orr r3, r3, r4, lsr #16
580 mov r4, r4, lsl #16
581 orr r4, r4, r5, lsr #16
582 mov r5, r5, lsl #16
583 orr r5, r5, r12, lsr #16
584 mov r12, r12, lsl #16
585 orr r12, r12, lr, lsr #16
586 #else
587 orr r3, r3, r4, lsl #16
588 mov r4, r4, lsr #16
589 orr r4, r4, r5, lsl #16
590 mov r5, r5, lsr #16
591 orr r5, r5, r12, lsl #16
592 mov r12, r12, lsr #16
593 orr r12, r12, lr, lsl #16
594 #endif
595 stmia r0!, {r3-r5, r12}
596 subs r2, r2, #0x10
597 bge .Lmemmove_fsrcul2loop16
598 ldmia sp!, {r4, r5}
599 adds r2, r2, #0x0c
600 blt .Lmemmove_fsrcul2l4
601
602 .Lmemmove_fsrcul2loop4:
603 #ifdef __ARMEB__
604 mov r12, lr, lsl #16
605 #else
606 mov r12, lr, lsr #16
607 #endif
608 ldr lr, [r1], #4
609 #ifdef __ARMEB__
610 orr r12, r12, lr, lsr #16
611 #else
612 orr r12, r12, lr, lsl #16
613 #endif
614 str r12, [r0], #4
615 subs r2, r2, #4
616 bge .Lmemmove_fsrcul2loop4
617
618 .Lmemmove_fsrcul2l4:
619 sub r1, r1, #2
620 b .Lmemmove_fl4
621
622 .Lmemmove_fsrcul3:
623 cmp r2, #0x0c
624 blt .Lmemmove_fsrcul3loop4
625 sub r2, r2, #0x0c
626 stmdb sp!, {r4, r5}
627
628 .Lmemmove_fsrcul3loop16:
629 #ifdef __ARMEB__
630 mov r3, lr, lsl #24
631 #else
632 mov r3, lr, lsr #24
633 #endif
634 ldmia r1!, {r4, r5, r12, lr}
635 #ifdef __ARMEB__
636 orr r3, r3, r4, lsr #8
637 mov r4, r4, lsl #24
638 orr r4, r4, r5, lsr #8
639 mov r5, r5, lsl #24
640 orr r5, r5, r12, lsr #8
641 mov r12, r12, lsl #24
642 orr r12, r12, lr, lsr #8
643 #else
644 orr r3, r3, r4, lsl #8
645 mov r4, r4, lsr #24
646 orr r4, r4, r5, lsl #8
647 mov r5, r5, lsr #24
648 orr r5, r5, r12, lsl #8
649 mov r12, r12, lsr #24
650 orr r12, r12, lr, lsl #8
651 #endif
652 stmia r0!, {r3-r5, r12}
653 subs r2, r2, #0x10
654 bge .Lmemmove_fsrcul3loop16
655 ldmia sp!, {r4, r5}
656 adds r2, r2, #0x0c
657 blt .Lmemmove_fsrcul3l4
658
659 .Lmemmove_fsrcul3loop4:
660 #ifdef __ARMEB__
661 mov r12, lr, lsl #24
662 #else
663 mov r12, lr, lsr #24
664 #endif
665 ldr lr, [r1], #4
666 #ifdef __ARMEB__
667 orr r12, r12, lr, lsr #8
668 #else
669 orr r12, r12, lr, lsl #8
670 #endif
671 str r12, [r0], #4
672 subs r2, r2, #4
673 bge .Lmemmove_fsrcul3loop4
674
675 .Lmemmove_fsrcul3l4:
676 sub r1, r1, #1
677 b .Lmemmove_fl4
678
679 .Lmemmove_backwards:
680 add r1, r1, r2
681 add r0, r0, r2
682 subs r2, r2, #4
683 blt .Lmemmove_bl4 /* less than 4 bytes */
684 ands r12, r0, #3
685 bne .Lmemmove_bdestul /* oh unaligned destination addr */
686 ands r12, r1, #3
687 bne .Lmemmove_bsrcul /* oh unaligned source addr */
688
689 .Lmemmove_bt8:
690 /* We have aligned source and destination */
691 subs r2, r2, #8
692 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
693 stmdb sp!, {r4, lr}
694 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
695 blt .Lmemmove_bl32
696
697 /* blat 32 bytes at a time */
698 /* XXX for really big copies perhaps we should use more registers */
699 .Lmemmove_bloop32:
700 ldmdb r1!, {r3, r4, r12, lr}
701 stmdb r0!, {r3, r4, r12, lr}
702 ldmdb r1!, {r3, r4, r12, lr}
703 stmdb r0!, {r3, r4, r12, lr}
704 subs r2, r2, #0x20
705 bge .Lmemmove_bloop32
706
707 .Lmemmove_bl32:
708 cmn r2, #0x10
709 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
710 stmdbge r0!, {r3, r4, r12, lr}
711 subge r2, r2, #0x10
712 adds r2, r2, #0x14
713 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
714 stmdbge r0!, {r3, r12, lr}
715 subge r2, r2, #0x0c
716 ldmia sp!, {r4, lr}
717
718 .Lmemmove_bl12:
719 adds r2, r2, #8
720 blt .Lmemmove_bl4
721 subs r2, r2, #4
722 ldrlt r3, [r1, #-4]!
723 strlt r3, [r0, #-4]!
724 ldmdbge r1!, {r3, r12}
725 stmdbge r0!, {r3, r12}
726 subge r2, r2, #4
727
728 .Lmemmove_bl4:
729 /* less than 4 bytes to go */
730 adds r2, r2, #4
731 RETeq /* done */
732
733 /* copy the crud byte at a time */
734 cmp r2, #2
735 ldrb r3, [r1, #-1]!
736 strb r3, [r0, #-1]!
737 ldrbge r3, [r1, #-1]!
738 strbge r3, [r0, #-1]!
739 ldrbgt r3, [r1, #-1]!
740 strbgt r3, [r0, #-1]!
741 RET
742
743 /* erg - unaligned destination */
744 .Lmemmove_bdestul:
745 cmp r12, #2
746
747 /* align destination with byte copies */
748 ldrb r3, [r1, #-1]!
749 strb r3, [r0, #-1]!
750 ldrbge r3, [r1, #-1]!
751 strbge r3, [r0, #-1]!
752 ldrbgt r3, [r1, #-1]!
753 strbgt r3, [r0, #-1]!
754 subs r2, r2, r12
755 blt .Lmemmove_bl4 /* less than 4 bytes to go */
756 ands r12, r1, #3
757 beq .Lmemmove_bt8 /* we have an aligned source */
758
759 /* erg - unaligned source */
760 /* This is where it gets nasty ... */
761 .Lmemmove_bsrcul:
762 bic r1, r1, #3
763 ldr r3, [r1, #0]
764 cmp r12, #2
765 blt .Lmemmove_bsrcul1
766 beq .Lmemmove_bsrcul2
767 cmp r2, #0x0c
768 blt .Lmemmove_bsrcul3loop4
769 sub r2, r2, #0x0c
770 stmdb sp!, {r4, r5, lr}
771
772 .Lmemmove_bsrcul3loop16:
773 #ifdef __ARMEB__
774 mov lr, r3, lsr #8
775 #else
776 mov lr, r3, lsl #8
777 #endif
778 ldmdb r1!, {r3-r5, r12}
779 #ifdef __ARMEB__
780 orr lr, lr, r12, lsl #24
781 mov r12, r12, lsr #8
782 orr r12, r12, r5, lsl #24
783 mov r5, r5, lsr #8
784 orr r5, r5, r4, lsl #24
785 mov r4, r4, lsr #8
786 orr r4, r4, r3, lsl #24
787 #else
788 orr lr, lr, r12, lsr #24
789 mov r12, r12, lsl #8
790 orr r12, r12, r5, lsr #24
791 mov r5, r5, lsl #8
792 orr r5, r5, r4, lsr #24
793 mov r4, r4, lsl #8
794 orr r4, r4, r3, lsr #24
795 #endif
796 stmdb r0!, {r4, r5, r12, lr}
797 subs r2, r2, #0x10
798 bge .Lmemmove_bsrcul3loop16
799 ldmia sp!, {r4, r5, lr}
800 adds r2, r2, #0x0c
801 blt .Lmemmove_bsrcul3l4
802
803 .Lmemmove_bsrcul3loop4:
804 #ifdef __ARMEB__
805 mov r12, r3, lsr #8
806 #else
807 mov r12, r3, lsl #8
808 #endif
809 ldr r3, [r1, #-4]!
810 #ifdef __ARMEB__
811 orr r12, r12, r3, lsl #24
812 #else
813 orr r12, r12, r3, lsr #24
814 #endif
815 str r12, [r0, #-4]!
816 subs r2, r2, #4
817 bge .Lmemmove_bsrcul3loop4
818
819 .Lmemmove_bsrcul3l4:
820 add r1, r1, #3
821 b .Lmemmove_bl4
822
823 .Lmemmove_bsrcul2:
824 cmp r2, #0x0c
825 blt .Lmemmove_bsrcul2loop4
826 sub r2, r2, #0x0c
827 stmdb sp!, {r4, r5, lr}
828
829 .Lmemmove_bsrcul2loop16:
830 #ifdef __ARMEB__
831 mov lr, r3, lsr #16
832 #else
833 mov lr, r3, lsl #16
834 #endif
835 ldmdb r1!, {r3-r5, r12}
836 #ifdef __ARMEB__
837 orr lr, lr, r12, lsl #16
838 mov r12, r12, lsr #16
839 orr r12, r12, r5, lsl #16
840 mov r5, r5, lsr #16
841 orr r5, r5, r4, lsl #16
842 mov r4, r4, lsr #16
843 orr r4, r4, r3, lsl #16
844 #else
845 orr lr, lr, r12, lsr #16
846 mov r12, r12, lsl #16
847 orr r12, r12, r5, lsr #16
848 mov r5, r5, lsl #16
849 orr r5, r5, r4, lsr #16
850 mov r4, r4, lsl #16
851 orr r4, r4, r3, lsr #16
852 #endif
853 stmdb r0!, {r4, r5, r12, lr}
854 subs r2, r2, #0x10
855 bge .Lmemmove_bsrcul2loop16
856 ldmia sp!, {r4, r5, lr}
857 adds r2, r2, #0x0c
858 blt .Lmemmove_bsrcul2l4
859
860 .Lmemmove_bsrcul2loop4:
861 #ifdef __ARMEB__
862 mov r12, r3, lsr #16
863 #else
864 mov r12, r3, lsl #16
865 #endif
866 ldr r3, [r1, #-4]!
867 #ifdef __ARMEB__
868 orr r12, r12, r3, lsl #16
869 #else
870 orr r12, r12, r3, lsr #16
871 #endif
872 str r12, [r0, #-4]!
873 subs r2, r2, #4
874 bge .Lmemmove_bsrcul2loop4
875
876 .Lmemmove_bsrcul2l4:
877 add r1, r1, #2
878 b .Lmemmove_bl4
879
880 .Lmemmove_bsrcul1:
881 cmp r2, #0x0c
882 blt .Lmemmove_bsrcul1loop4
883 sub r2, r2, #0x0c
884 stmdb sp!, {r4, r5, lr}
885
886 .Lmemmove_bsrcul1loop32:
887 #ifdef __ARMEB__
888 mov lr, r3, lsr #24
889 #else
890 mov lr, r3, lsl #24
891 #endif
892 ldmdb r1!, {r3-r5, r12}
893 #ifdef __ARMEB__
894 orr lr, lr, r12, lsl #8
895 mov r12, r12, lsr #24
896 orr r12, r12, r5, lsl #8
897 mov r5, r5, lsr #24
898 orr r5, r5, r4, lsl #8
899 mov r4, r4, lsr #24
900 orr r4, r4, r3, lsl #8
901 #else
902 orr lr, lr, r12, lsr #8
903 mov r12, r12, lsl #24
904 orr r12, r12, r5, lsr #8
905 mov r5, r5, lsl #24
906 orr r5, r5, r4, lsr #8
907 mov r4, r4, lsl #24
908 orr r4, r4, r3, lsr #8
909 #endif
910 stmdb r0!, {r4, r5, r12, lr}
911 subs r2, r2, #0x10
912 bge .Lmemmove_bsrcul1loop32
913 ldmia sp!, {r4, r5, lr}
914 adds r2, r2, #0x0c
915 blt .Lmemmove_bsrcul1l4
916
917 .Lmemmove_bsrcul1loop4:
918 #ifdef __ARMEB__
919 mov r12, r3, lsr #24
920 #else
921 mov r12, r3, lsl #24
922 #endif
923 ldr r3, [r1, #-4]!
924 #ifdef __ARMEB__
925 orr r12, r12, r3, lsl #8
926 #else
927 orr r12, r12, r3, lsr #8
928 #endif
929 str r12, [r0, #-4]!
930 subs r2, r2, #4
931 bge .Lmemmove_bsrcul1loop4
932
933 .Lmemmove_bsrcul1l4:
934 add r1, r1, #1
935 b .Lmemmove_bl4
936 EEND(memmove)
937 END(bcopy)
938
939 #if !defined(_ARM_ARCH_5E)
940 ENTRY(memcpy)
941 /* save leaf functions having to store this away */
942 /* Do not check arm_memcpy if we're running from flash */
943 #if defined(FLASHADDR) && defined(PHYSADDR)
944 #if FLASHADDR > PHYSADDR
945 ldr r3, =FLASHADDR
946 cmp r3, pc
947 bls .Lnormal
948 #else
949 ldr r3, =FLASHADDR
950 cmp r3, pc
951 bhi .Lnormal
952 #endif
953 #endif
954 ldr r3, .L_arm_memcpy
955 ldr r3, [r3]
956 cmp r3, #0
957 beq .Lnormal
958 ldr r3, .L_min_memcpy_size
959 ldr r3, [r3]
960 cmp r2, r3
961 blt .Lnormal
962 stmfd sp!, {r0-r2, r4, lr}
963 mov r3, #0
964 ldr r4, .L_arm_memcpy
965 mov lr, pc
966 ldr pc, [r4]
967 cmp r0, #0
968 ldmfd sp!, {r0-r2, r4, lr}
969 RETeq
970
971 .Lnormal:
972 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
973
974 subs r2, r2, #4
975 blt .Lmemcpy_l4 /* less than 4 bytes */
976 ands r12, r0, #3
977 bne .Lmemcpy_destul /* oh unaligned destination addr */
978 ands r12, r1, #3
979 bne .Lmemcpy_srcul /* oh unaligned source addr */
980
981 .Lmemcpy_t8:
982 /* We have aligned source and destination */
983 subs r2, r2, #8
984 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
985 subs r2, r2, #0x14
986 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
987 stmdb sp!, {r4} /* borrow r4 */
988
989 /* blat 32 bytes at a time */
990 /* XXX for really big copies perhaps we should use more registers */
991 .Lmemcpy_loop32:
992 ldmia r1!, {r3, r4, r12, lr}
993 stmia r0!, {r3, r4, r12, lr}
994 ldmia r1!, {r3, r4, r12, lr}
995 stmia r0!, {r3, r4, r12, lr}
996 subs r2, r2, #0x20
997 bge .Lmemcpy_loop32
998
999 cmn r2, #0x10
1000 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
1001 stmiage r0!, {r3, r4, r12, lr}
1002 subge r2, r2, #0x10
1003 ldmia sp!, {r4} /* return r4 */
1004
1005 .Lmemcpy_l32:
1006 adds r2, r2, #0x14
1007
1008 /* blat 12 bytes at a time */
1009 .Lmemcpy_loop12:
1010 ldmiage r1!, {r3, r12, lr}
1011 stmiage r0!, {r3, r12, lr}
1012 subsge r2, r2, #0x0c
1013 bge .Lmemcpy_loop12
1014
1015 .Lmemcpy_l12:
1016 adds r2, r2, #8
1017 blt .Lmemcpy_l4
1018
1019 subs r2, r2, #4
1020 ldrlt r3, [r1], #4
1021 strlt r3, [r0], #4
1022 ldmiage r1!, {r3, r12}
1023 stmiage r0!, {r3, r12}
1024 subge r2, r2, #4
1025
1026 .Lmemcpy_l4:
1027 /* less than 4 bytes to go */
1028 adds r2, r2, #4
1029 #ifdef __APCS_26_
1030 ldmiaeq sp!, {r0, pc}^ /* done */
1031 #else
1032 ldmiaeq sp!, {r0, pc} /* done */
1033 #endif
1034 /* copy the crud byte at a time */
1035 cmp r2, #2
1036 ldrb r3, [r1], #1
1037 strb r3, [r0], #1
1038 ldrbge r3, [r1], #1
1039 strbge r3, [r0], #1
1040 ldrbgt r3, [r1], #1
1041 strbgt r3, [r0], #1
1042 ldmia sp!, {r0, pc}
1043
1044 /* erg - unaligned destination */
1045 .Lmemcpy_destul:
1046 rsb r12, r12, #4
1047 cmp r12, #2
1048
1049 /* align destination with byte copies */
1050 ldrb r3, [r1], #1
1051 strb r3, [r0], #1
1052 ldrbge r3, [r1], #1
1053 strbge r3, [r0], #1
1054 ldrbgt r3, [r1], #1
1055 strbgt r3, [r0], #1
1056 subs r2, r2, r12
1057 blt .Lmemcpy_l4 /* less the 4 bytes */
1058
1059 ands r12, r1, #3
1060 beq .Lmemcpy_t8 /* we have an aligned source */
1061
1062 /* erg - unaligned source */
1063 /* This is where it gets nasty ... */
1064 .Lmemcpy_srcul:
1065 bic r1, r1, #3
1066 ldr lr, [r1], #4
1067 cmp r12, #2
1068 bgt .Lmemcpy_srcul3
1069 beq .Lmemcpy_srcul2
1070 cmp r2, #0x0c
1071 blt .Lmemcpy_srcul1loop4
1072 sub r2, r2, #0x0c
1073 stmdb sp!, {r4, r5}
1074
1075 .Lmemcpy_srcul1loop16:
1076 mov r3, lr, lsr #8
1077 ldmia r1!, {r4, r5, r12, lr}
1078 orr r3, r3, r4, lsl #24
1079 mov r4, r4, lsr #8
1080 orr r4, r4, r5, lsl #24
1081 mov r5, r5, lsr #8
1082 orr r5, r5, r12, lsl #24
1083 mov r12, r12, lsr #8
1084 orr r12, r12, lr, lsl #24
1085 stmia r0!, {r3-r5, r12}
1086 subs r2, r2, #0x10
1087 bge .Lmemcpy_srcul1loop16
1088 ldmia sp!, {r4, r5}
1089 adds r2, r2, #0x0c
1090 blt .Lmemcpy_srcul1l4
1091
1092 .Lmemcpy_srcul1loop4:
1093 mov r12, lr, lsr #8
1094 ldr lr, [r1], #4
1095 orr r12, r12, lr, lsl #24
1096 str r12, [r0], #4
1097 subs r2, r2, #4
1098 bge .Lmemcpy_srcul1loop4
1099
1100 .Lmemcpy_srcul1l4:
1101 sub r1, r1, #3
1102 b .Lmemcpy_l4
1103
1104 .Lmemcpy_srcul2:
1105 cmp r2, #0x0c
1106 blt .Lmemcpy_srcul2loop4
1107 sub r2, r2, #0x0c
1108 stmdb sp!, {r4, r5}
1109
1110 .Lmemcpy_srcul2loop16:
1111 mov r3, lr, lsr #16
1112 ldmia r1!, {r4, r5, r12, lr}
1113 orr r3, r3, r4, lsl #16
1114 mov r4, r4, lsr #16
1115 orr r4, r4, r5, lsl #16
1116 mov r5, r5, lsr #16
1117 orr r5, r5, r12, lsl #16
1118 mov r12, r12, lsr #16
1119 orr r12, r12, lr, lsl #16
1120 stmia r0!, {r3-r5, r12}
1121 subs r2, r2, #0x10
1122 bge .Lmemcpy_srcul2loop16
1123 ldmia sp!, {r4, r5}
1124 adds r2, r2, #0x0c
1125 blt .Lmemcpy_srcul2l4
1126
1127 .Lmemcpy_srcul2loop4:
1128 mov r12, lr, lsr #16
1129 ldr lr, [r1], #4
1130 orr r12, r12, lr, lsl #16
1131 str r12, [r0], #4
1132 subs r2, r2, #4
1133 bge .Lmemcpy_srcul2loop4
1134
1135 .Lmemcpy_srcul2l4:
1136 sub r1, r1, #2
1137 b .Lmemcpy_l4
1138
1139 .Lmemcpy_srcul3:
1140 cmp r2, #0x0c
1141 blt .Lmemcpy_srcul3loop4
1142 sub r2, r2, #0x0c
1143 stmdb sp!, {r4, r5}
1144
1145 .Lmemcpy_srcul3loop16:
1146 mov r3, lr, lsr #24
1147 ldmia r1!, {r4, r5, r12, lr}
1148 orr r3, r3, r4, lsl #8
1149 mov r4, r4, lsr #24
1150 orr r4, r4, r5, lsl #8
1151 mov r5, r5, lsr #24
1152 orr r5, r5, r12, lsl #8
1153 mov r12, r12, lsr #24
1154 orr r12, r12, lr, lsl #8
1155 stmia r0!, {r3-r5, r12}
1156 subs r2, r2, #0x10
1157 bge .Lmemcpy_srcul3loop16
1158 ldmia sp!, {r4, r5}
1159 adds r2, r2, #0x0c
1160 blt .Lmemcpy_srcul3l4
1161
1162 .Lmemcpy_srcul3loop4:
1163 mov r12, lr, lsr #24
1164 ldr lr, [r1], #4
1165 orr r12, r12, lr, lsl #8
1166 str r12, [r0], #4
1167 subs r2, r2, #4
1168 bge .Lmemcpy_srcul3loop4
1169
1170 .Lmemcpy_srcul3l4:
1171 sub r1, r1, #1
1172 b .Lmemcpy_l4
1173 END(memcpy)
1174
1175 #else
1176 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1177 ENTRY(memcpy)
1178 pld [r1]
1179 cmp r2, #0x0c
1180 ble .Lmemcpy_short /* <= 12 bytes */
1181 #ifdef FLASHADDR
1182 #if FLASHADDR > PHYSADDR
1183 ldr r3, =FLASHADDR
1184 cmp r3, pc
1185 bls .Lnormal
1186 #else
1187 ldr r3, =FLASHADDR
1188 cmp r3, pc
1189 bhi .Lnormal
1190 #endif
1191 #endif
1192 ldr r3, .L_arm_memcpy
1193 ldr r3, [r3]
1194 cmp r3, #0
1195 beq .Lnormal
1196 ldr r3, .L_min_memcpy_size
1197 ldr r3, [r3]
1198 cmp r2, r3
1199 blt .Lnormal
1200 stmfd sp!, {r0-r2, r4, lr}
1201 mov r3, #0
1202 ldr r4, .L_arm_memcpy
1203 mov lr, pc
1204 ldr pc, [r4]
1205 cmp r0, #0
1206 ldmfd sp!, {r0-r2, r4, lr}
1207 RETeq
1208 .Lnormal:
1209 mov r3, r0 /* We must not clobber r0 */
1210
1211 /* Word-align the destination buffer */
1212 ands ip, r3, #0x03 /* Already word aligned? */
1213 beq .Lmemcpy_wordaligned /* Yup */
1214 cmp ip, #0x02
1215 ldrb ip, [r1], #0x01
1216 sub r2, r2, #0x01
1217 strb ip, [r3], #0x01
1218 ldrble ip, [r1], #0x01
1219 suble r2, r2, #0x01
1220 strble ip, [r3], #0x01
1221 ldrblt ip, [r1], #0x01
1222 sublt r2, r2, #0x01
1223 strblt ip, [r3], #0x01
1224
1225 /* Destination buffer is now word aligned */
1226 .Lmemcpy_wordaligned:
1227 ands ip, r1, #0x03 /* Is src also word-aligned? */
1228 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1229
1230 /* Quad-align the destination buffer */
1231 tst r3, #0x07 /* Already quad aligned? */
1232 ldrne ip, [r1], #0x04
1233 stmfd sp!, {r4-r9} /* Free up some registers */
1234 subne r2, r2, #0x04
1235 strne ip, [r3], #0x04
1236
1237 /* Destination buffer quad aligned, source is at least word aligned */
1238 subs r2, r2, #0x80
1239 blt .Lmemcpy_w_lessthan128
1240
1241 /* Copy 128 bytes at a time */
1242 .Lmemcpy_w_loop128:
1243 ldr r4, [r1], #0x04 /* LD:00-03 */
1244 ldr r5, [r1], #0x04 /* LD:04-07 */
1245 pld [r1, #0x18] /* Prefetch 0x20 */
1246 ldr r6, [r1], #0x04 /* LD:08-0b */
1247 ldr r7, [r1], #0x04 /* LD:0c-0f */
1248 ldr r8, [r1], #0x04 /* LD:10-13 */
1249 ldr r9, [r1], #0x04 /* LD:14-17 */
1250 strd r4, [r3], #0x08 /* ST:00-07 */
1251 ldr r4, [r1], #0x04 /* LD:18-1b */
1252 ldr r5, [r1], #0x04 /* LD:1c-1f */
1253 strd r6, [r3], #0x08 /* ST:08-0f */
1254 ldr r6, [r1], #0x04 /* LD:20-23 */
1255 ldr r7, [r1], #0x04 /* LD:24-27 */
1256 pld [r1, #0x18] /* Prefetch 0x40 */
1257 strd r8, [r3], #0x08 /* ST:10-17 */
1258 ldr r8, [r1], #0x04 /* LD:28-2b */
1259 ldr r9, [r1], #0x04 /* LD:2c-2f */
1260 strd r4, [r3], #0x08 /* ST:18-1f */
1261 ldr r4, [r1], #0x04 /* LD:30-33 */
1262 ldr r5, [r1], #0x04 /* LD:34-37 */
1263 strd r6, [r3], #0x08 /* ST:20-27 */
1264 ldr r6, [r1], #0x04 /* LD:38-3b */
1265 ldr r7, [r1], #0x04 /* LD:3c-3f */
1266 strd r8, [r3], #0x08 /* ST:28-2f */
1267 ldr r8, [r1], #0x04 /* LD:40-43 */
1268 ldr r9, [r1], #0x04 /* LD:44-47 */
1269 pld [r1, #0x18] /* Prefetch 0x60 */
1270 strd r4, [r3], #0x08 /* ST:30-37 */
1271 ldr r4, [r1], #0x04 /* LD:48-4b */
1272 ldr r5, [r1], #0x04 /* LD:4c-4f */
1273 strd r6, [r3], #0x08 /* ST:38-3f */
1274 ldr r6, [r1], #0x04 /* LD:50-53 */
1275 ldr r7, [r1], #0x04 /* LD:54-57 */
1276 strd r8, [r3], #0x08 /* ST:40-47 */
1277 ldr r8, [r1], #0x04 /* LD:58-5b */
1278 ldr r9, [r1], #0x04 /* LD:5c-5f */
1279 strd r4, [r3], #0x08 /* ST:48-4f */
1280 ldr r4, [r1], #0x04 /* LD:60-63 */
1281 ldr r5, [r1], #0x04 /* LD:64-67 */
1282 pld [r1, #0x18] /* Prefetch 0x80 */
1283 strd r6, [r3], #0x08 /* ST:50-57 */
1284 ldr r6, [r1], #0x04 /* LD:68-6b */
1285 ldr r7, [r1], #0x04 /* LD:6c-6f */
1286 strd r8, [r3], #0x08 /* ST:58-5f */
1287 ldr r8, [r1], #0x04 /* LD:70-73 */
1288 ldr r9, [r1], #0x04 /* LD:74-77 */
1289 strd r4, [r3], #0x08 /* ST:60-67 */
1290 ldr r4, [r1], #0x04 /* LD:78-7b */
1291 ldr r5, [r1], #0x04 /* LD:7c-7f */
1292 strd r6, [r3], #0x08 /* ST:68-6f */
1293 strd r8, [r3], #0x08 /* ST:70-77 */
1294 subs r2, r2, #0x80
1295 strd r4, [r3], #0x08 /* ST:78-7f */
1296 bge .Lmemcpy_w_loop128
1297
1298 .Lmemcpy_w_lessthan128:
1299 adds r2, r2, #0x80 /* Adjust for extra sub */
1300 ldmfdeq sp!, {r4-r9}
1301 RETeq /* Return now if done */
1302 subs r2, r2, #0x20
1303 blt .Lmemcpy_w_lessthan32
1304
1305 /* Copy 32 bytes at a time */
1306 .Lmemcpy_w_loop32:
1307 ldr r4, [r1], #0x04
1308 ldr r5, [r1], #0x04
1309 pld [r1, #0x18]
1310 ldr r6, [r1], #0x04
1311 ldr r7, [r1], #0x04
1312 ldr r8, [r1], #0x04
1313 ldr r9, [r1], #0x04
1314 strd r4, [r3], #0x08
1315 ldr r4, [r1], #0x04
1316 ldr r5, [r1], #0x04
1317 strd r6, [r3], #0x08
1318 strd r8, [r3], #0x08
1319 subs r2, r2, #0x20
1320 strd r4, [r3], #0x08
1321 bge .Lmemcpy_w_loop32
1322
1323 .Lmemcpy_w_lessthan32:
1324 adds r2, r2, #0x20 /* Adjust for extra sub */
1325 ldmfdeq sp!, {r4-r9}
1326 RETeq /* Return now if done */
1327
1328 and r4, r2, #0x18
1329 rsbs r4, r4, #0x18
1330 addne pc, pc, r4, lsl #1
1331 nop
1332
1333 /* At least 24 bytes remaining */
1334 ldr r4, [r1], #0x04
1335 ldr r5, [r1], #0x04
1336 sub r2, r2, #0x08
1337 strd r4, [r3], #0x08
1338
1339 /* At least 16 bytes remaining */
1340 ldr r4, [r1], #0x04
1341 ldr r5, [r1], #0x04
1342 sub r2, r2, #0x08
1343 strd r4, [r3], #0x08
1344
1345 /* At least 8 bytes remaining */
1346 ldr r4, [r1], #0x04
1347 ldr r5, [r1], #0x04
1348 subs r2, r2, #0x08
1349 strd r4, [r3], #0x08
1350
1351 /* Less than 8 bytes remaining */
1352 ldmfd sp!, {r4-r9}
1353 RETeq /* Return now if done */
1354 subs r2, r2, #0x04
1355 ldrge ip, [r1], #0x04
1356 strge ip, [r3], #0x04
1357 RETeq /* Return now if done */
1358 addlt r2, r2, #0x04
1359 ldrb ip, [r1], #0x01
1360 cmp r2, #0x02
1361 ldrbge r2, [r1], #0x01
1362 strb ip, [r3], #0x01
1363 ldrbgt ip, [r1]
1364 strbge r2, [r3], #0x01
1365 strbgt ip, [r3]
1366 RET
1367 /* Place a literal pool here for the above ldr instructions to use */
1368 .ltorg
1369
1370
1371 /*
1372 * At this point, it has not been possible to word align both buffers.
1373 * The destination buffer is word aligned, but the source buffer is not.
1374 */
1375 .Lmemcpy_bad_align:
1376 stmfd sp!, {r4-r7}
1377 bic r1, r1, #0x03
1378 cmp ip, #2
1379 ldr ip, [r1], #0x04
1380 bgt .Lmemcpy_bad3
1381 beq .Lmemcpy_bad2
1382 b .Lmemcpy_bad1
1383
1384 .Lmemcpy_bad1_loop16:
1385 #ifdef __ARMEB__
1386 mov r4, ip, lsl #8
1387 #else
1388 mov r4, ip, lsr #8
1389 #endif
1390 ldr r5, [r1], #0x04
1391 pld [r1, #0x018]
1392 ldr r6, [r1], #0x04
1393 ldr r7, [r1], #0x04
1394 ldr ip, [r1], #0x04
1395 #ifdef __ARMEB__
1396 orr r4, r4, r5, lsr #24
1397 mov r5, r5, lsl #8
1398 orr r5, r5, r6, lsr #24
1399 mov r6, r6, lsl #8
1400 orr r6, r6, r7, lsr #24
1401 mov r7, r7, lsl #8
1402 orr r7, r7, ip, lsr #24
1403 #else
1404 orr r4, r4, r5, lsl #24
1405 mov r5, r5, lsr #8
1406 orr r5, r5, r6, lsl #24
1407 mov r6, r6, lsr #8
1408 orr r6, r6, r7, lsl #24
1409 mov r7, r7, lsr #8
1410 orr r7, r7, ip, lsl #24
1411 #endif
1412 str r4, [r3], #0x04
1413 str r5, [r3], #0x04
1414 str r6, [r3], #0x04
1415 str r7, [r3], #0x04
1416 .Lmemcpy_bad1:
1417 subs r2, r2, #0x10
1418 bge .Lmemcpy_bad1_loop16
1419
1420 adds r2, r2, #0x10
1421 ldmfdeq sp!, {r4-r7}
1422 RETeq /* Return now if done */
1423 subs r2, r2, #0x04
1424 sublt r1, r1, #0x03
1425 blt .Lmemcpy_bad_done
1426
1427 .Lmemcpy_bad1_loop4:
1428 #ifdef __ARMEB__
1429 mov r4, ip, lsl #8
1430 #else
1431 mov r4, ip, lsr #8
1432 #endif
1433 ldr ip, [r1], #0x04
1434 subs r2, r2, #0x04
1435 #ifdef __ARMEB__
1436 orr r4, r4, ip, lsr #24
1437 #else
1438 orr r4, r4, ip, lsl #24
1439 #endif
1440 str r4, [r3], #0x04
1441 bge .Lmemcpy_bad1_loop4
1442 sub r1, r1, #0x03
1443 b .Lmemcpy_bad_done
1444
1445 .Lmemcpy_bad2_loop16:
1446 #ifdef __ARMEB__
1447 mov r4, ip, lsl #16
1448 #else
1449 mov r4, ip, lsr #16
1450 #endif
1451 ldr r5, [r1], #0x04
1452 pld [r1, #0x018]
1453 ldr r6, [r1], #0x04
1454 ldr r7, [r1], #0x04
1455 ldr ip, [r1], #0x04
1456 #ifdef __ARMEB__
1457 orr r4, r4, r5, lsr #16
1458 mov r5, r5, lsl #16
1459 orr r5, r5, r6, lsr #16
1460 mov r6, r6, lsl #16
1461 orr r6, r6, r7, lsr #16
1462 mov r7, r7, lsl #16
1463 orr r7, r7, ip, lsr #16
1464 #else
1465 orr r4, r4, r5, lsl #16
1466 mov r5, r5, lsr #16
1467 orr r5, r5, r6, lsl #16
1468 mov r6, r6, lsr #16
1469 orr r6, r6, r7, lsl #16
1470 mov r7, r7, lsr #16
1471 orr r7, r7, ip, lsl #16
1472 #endif
1473 str r4, [r3], #0x04
1474 str r5, [r3], #0x04
1475 str r6, [r3], #0x04
1476 str r7, [r3], #0x04
1477 .Lmemcpy_bad2:
1478 subs r2, r2, #0x10
1479 bge .Lmemcpy_bad2_loop16
1480
1481 adds r2, r2, #0x10
1482 ldmfdeq sp!, {r4-r7}
1483 RETeq /* Return now if done */
1484 subs r2, r2, #0x04
1485 sublt r1, r1, #0x02
1486 blt .Lmemcpy_bad_done
1487
1488 .Lmemcpy_bad2_loop4:
1489 #ifdef __ARMEB__
1490 mov r4, ip, lsl #16
1491 #else
1492 mov r4, ip, lsr #16
1493 #endif
1494 ldr ip, [r1], #0x04
1495 subs r2, r2, #0x04
1496 #ifdef __ARMEB__
1497 orr r4, r4, ip, lsr #16
1498 #else
1499 orr r4, r4, ip, lsl #16
1500 #endif
1501 str r4, [r3], #0x04
1502 bge .Lmemcpy_bad2_loop4
1503 sub r1, r1, #0x02
1504 b .Lmemcpy_bad_done
1505
1506 .Lmemcpy_bad3_loop16:
1507 #ifdef __ARMEB__
1508 mov r4, ip, lsl #24
1509 #else
1510 mov r4, ip, lsr #24
1511 #endif
1512 ldr r5, [r1], #0x04
1513 pld [r1, #0x018]
1514 ldr r6, [r1], #0x04
1515 ldr r7, [r1], #0x04
1516 ldr ip, [r1], #0x04
1517 #ifdef __ARMEB__
1518 orr r4, r4, r5, lsr #8
1519 mov r5, r5, lsl #24
1520 orr r5, r5, r6, lsr #8
1521 mov r6, r6, lsl #24
1522 orr r6, r6, r7, lsr #8
1523 mov r7, r7, lsl #24
1524 orr r7, r7, ip, lsr #8
1525 #else
1526 orr r4, r4, r5, lsl #8
1527 mov r5, r5, lsr #24
1528 orr r5, r5, r6, lsl #8
1529 mov r6, r6, lsr #24
1530 orr r6, r6, r7, lsl #8
1531 mov r7, r7, lsr #24
1532 orr r7, r7, ip, lsl #8
1533 #endif
1534 str r4, [r3], #0x04
1535 str r5, [r3], #0x04
1536 str r6, [r3], #0x04
1537 str r7, [r3], #0x04
1538 .Lmemcpy_bad3:
1539 subs r2, r2, #0x10
1540 bge .Lmemcpy_bad3_loop16
1541
1542 adds r2, r2, #0x10
1543 ldmfdeq sp!, {r4-r7}
1544 RETeq /* Return now if done */
1545 subs r2, r2, #0x04
1546 sublt r1, r1, #0x01
1547 blt .Lmemcpy_bad_done
1548
1549 .Lmemcpy_bad3_loop4:
1550 #ifdef __ARMEB__
1551 mov r4, ip, lsl #24
1552 #else
1553 mov r4, ip, lsr #24
1554 #endif
1555 ldr ip, [r1], #0x04
1556 subs r2, r2, #0x04
1557 #ifdef __ARMEB__
1558 orr r4, r4, ip, lsr #8
1559 #else
1560 orr r4, r4, ip, lsl #8
1561 #endif
1562 str r4, [r3], #0x04
1563 bge .Lmemcpy_bad3_loop4
1564 sub r1, r1, #0x01
1565
1566 .Lmemcpy_bad_done:
1567 ldmfd sp!, {r4-r7}
1568 adds r2, r2, #0x04
1569 RETeq
1570 ldrb ip, [r1], #0x01
1571 cmp r2, #0x02
1572 ldrbge r2, [r1], #0x01
1573 strb ip, [r3], #0x01
1574 ldrbgt ip, [r1]
1575 strbge r2, [r3], #0x01
1576 strbgt ip, [r3]
1577 RET
1578
1579
1580 /*
1581 * Handle short copies (less than 16 bytes), possibly misaligned.
1582 * Some of these are *very* common, thanks to the network stack,
1583 * and so are handled specially.
1584 */
1585 .Lmemcpy_short:
1586 add pc, pc, r2, lsl #2
1587 nop
1588 RET /* 0x00 */
1589 b .Lmemcpy_bytewise /* 0x01 */
1590 b .Lmemcpy_bytewise /* 0x02 */
1591 b .Lmemcpy_bytewise /* 0x03 */
1592 b .Lmemcpy_4 /* 0x04 */
1593 b .Lmemcpy_bytewise /* 0x05 */
1594 b .Lmemcpy_6 /* 0x06 */
1595 b .Lmemcpy_bytewise /* 0x07 */
1596 b .Lmemcpy_8 /* 0x08 */
1597 b .Lmemcpy_bytewise /* 0x09 */
1598 b .Lmemcpy_bytewise /* 0x0a */
1599 b .Lmemcpy_bytewise /* 0x0b */
1600 b .Lmemcpy_c /* 0x0c */
1601 .Lmemcpy_bytewise:
1602 mov r3, r0 /* We must not clobber r0 */
1603 ldrb ip, [r1], #0x01
1604 1: subs r2, r2, #0x01
1605 strb ip, [r3], #0x01
1606 ldrbne ip, [r1], #0x01
1607 bne 1b
1608 RET
1609
1610 /******************************************************************************
1611 * Special case for 4 byte copies
1612 */
1613 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1614 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1615 LMEMCPY_4_PAD
1616 .Lmemcpy_4:
1617 and r2, r1, #0x03
1618 orr r2, r2, r0, lsl #2
1619 ands r2, r2, #0x0f
1620 sub r3, pc, #0x14
1621 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1622
1623 /*
1624 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1625 */
1626 ldr r2, [r1]
1627 str r2, [r0]
1628 RET
1629 LMEMCPY_4_PAD
1630
1631 /*
1632 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1633 */
1634 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1635 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1636 #ifdef __ARMEB__
1637 mov r3, r3, lsl #8 /* r3 = 012. */
1638 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1639 #else
1640 mov r3, r3, lsr #8 /* r3 = .210 */
1641 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1642 #endif
1643 str r3, [r0]
1644 RET
1645 LMEMCPY_4_PAD
1646
1647 /*
1648 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1649 */
1650 #ifdef __ARMEB__
1651 ldrh r3, [r1]
1652 ldrh r2, [r1, #0x02]
1653 #else
1654 ldrh r3, [r1, #0x02]
1655 ldrh r2, [r1]
1656 #endif
1657 orr r3, r2, r3, lsl #16
1658 str r3, [r0]
1659 RET
1660 LMEMCPY_4_PAD
1661
1662 /*
1663 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1664 */
1665 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1666 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1667 #ifdef __ARMEB__
1668 mov r3, r3, lsl #24 /* r3 = 0... */
1669 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1670 #else
1671 mov r3, r3, lsr #24 /* r3 = ...0 */
1672 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1673 #endif
1674 str r3, [r0]
1675 RET
1676 LMEMCPY_4_PAD
1677
1678 /*
1679 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1680 */
1681 ldr r2, [r1]
1682 #ifdef __ARMEB__
1683 strb r2, [r0, #0x03]
1684 mov r3, r2, lsr #8
1685 mov r1, r2, lsr #24
1686 strb r1, [r0]
1687 #else
1688 strb r2, [r0]
1689 mov r3, r2, lsr #8
1690 mov r1, r2, lsr #24
1691 strb r1, [r0, #0x03]
1692 #endif
1693 strh r3, [r0, #0x01]
1694 RET
1695 LMEMCPY_4_PAD
1696
1697 /*
1698 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1699 */
1700 ldrb r2, [r1]
1701 ldrh r3, [r1, #0x01]
1702 ldrb r1, [r1, #0x03]
1703 strb r2, [r0]
1704 strh r3, [r0, #0x01]
1705 strb r1, [r0, #0x03]
1706 RET
1707 LMEMCPY_4_PAD
1708
1709 /*
1710 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1711 */
1712 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1713 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1714 #ifdef __ARMEB__
1715 mov r1, r2, lsr #8 /* r1 = ...0 */
1716 strb r1, [r0]
1717 mov r2, r2, lsl #8 /* r2 = .01. */
1718 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1719 #else
1720 strb r2, [r0]
1721 mov r2, r2, lsr #8 /* r2 = ...1 */
1722 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1723 mov r3, r3, lsr #8 /* r3 = ...3 */
1724 #endif
1725 strh r2, [r0, #0x01]
1726 strb r3, [r0, #0x03]
1727 RET
1728 LMEMCPY_4_PAD
1729
1730 /*
1731 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1732 */
1733 ldrb r2, [r1]
1734 ldrh r3, [r1, #0x01]
1735 ldrb r1, [r1, #0x03]
1736 strb r2, [r0]
1737 strh r3, [r0, #0x01]
1738 strb r1, [r0, #0x03]
1739 RET
1740 LMEMCPY_4_PAD
1741
1742 /*
1743 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1744 */
1745 ldr r2, [r1]
1746 #ifdef __ARMEB__
1747 strh r2, [r0, #0x02]
1748 mov r3, r2, lsr #16
1749 strh r3, [r0]
1750 #else
1751 strh r2, [r0]
1752 mov r3, r2, lsr #16
1753 strh r3, [r0, #0x02]
1754 #endif
1755 RET
1756 LMEMCPY_4_PAD
1757
1758 /*
1759 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1760 */
1761 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1762 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1763 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1764 strh r1, [r0]
1765 #ifdef __ARMEB__
1766 mov r2, r2, lsl #8 /* r2 = 012. */
1767 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1768 #else
1769 mov r2, r2, lsr #24 /* r2 = ...2 */
1770 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1771 #endif
1772 strh r2, [r0, #0x02]
1773 RET
1774 LMEMCPY_4_PAD
1775
1776 /*
1777 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1778 */
1779 ldrh r2, [r1]
1780 ldrh r3, [r1, #0x02]
1781 strh r2, [r0]
1782 strh r3, [r0, #0x02]
1783 RET
1784 LMEMCPY_4_PAD
1785
1786 /*
1787 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1788 */
1789 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1790 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1791 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1792 strh r1, [r0, #0x02]
1793 #ifdef __ARMEB__
1794 mov r3, r3, lsr #24 /* r3 = ...1 */
1795 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1796 #else
1797 mov r3, r3, lsl #8 /* r3 = 321. */
1798 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1799 #endif
1800 strh r3, [r0]
1801 RET
1802 LMEMCPY_4_PAD
1803
1804 /*
1805 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1806 */
1807 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1808 #ifdef __ARMEB__
1809 strb r2, [r0, #0x03]
1810 mov r3, r2, lsr #8
1811 mov r1, r2, lsr #24
1812 strh r3, [r0, #0x01]
1813 strb r1, [r0]
1814 #else
1815 strb r2, [r0]
1816 mov r3, r2, lsr #8
1817 mov r1, r2, lsr #24
1818 strh r3, [r0, #0x01]
1819 strb r1, [r0, #0x03]
1820 #endif
1821 RET
1822 LMEMCPY_4_PAD
1823
1824 /*
1825 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1826 */
1827 ldrb r2, [r1]
1828 ldrh r3, [r1, #0x01]
1829 ldrb r1, [r1, #0x03]
1830 strb r2, [r0]
1831 strh r3, [r0, #0x01]
1832 strb r1, [r0, #0x03]
1833 RET
1834 LMEMCPY_4_PAD
1835
1836 /*
1837 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1838 */
1839 #ifdef __ARMEB__
1840 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1841 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1842 strb r3, [r0, #0x03]
1843 mov r3, r3, lsr #8 /* r3 = ...2 */
1844 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1845 strh r3, [r0, #0x01]
1846 mov r2, r2, lsr #8 /* r2 = ...0 */
1847 strb r2, [r0]
1848 #else
1849 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1850 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1851 strb r2, [r0]
1852 mov r2, r2, lsr #8 /* r2 = ...1 */
1853 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1854 strh r2, [r0, #0x01]
1855 mov r3, r3, lsr #8 /* r3 = ...3 */
1856 strb r3, [r0, #0x03]
1857 #endif
1858 RET
1859 LMEMCPY_4_PAD
1860
1861 /*
1862 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1863 */
1864 ldrb r2, [r1]
1865 ldrh r3, [r1, #0x01]
1866 ldrb r1, [r1, #0x03]
1867 strb r2, [r0]
1868 strh r3, [r0, #0x01]
1869 strb r1, [r0, #0x03]
1870 RET
1871 LMEMCPY_4_PAD
1872
1873
1874 /******************************************************************************
1875 * Special case for 6 byte copies
1876 */
1877 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1878 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1879 LMEMCPY_6_PAD
1880 .Lmemcpy_6:
1881 and r2, r1, #0x03
1882 orr r2, r2, r0, lsl #2
1883 ands r2, r2, #0x0f
1884 sub r3, pc, #0x14
1885 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1886
1887 /*
1888 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1889 */
1890 ldr r2, [r1]
1891 ldrh r3, [r1, #0x04]
1892 str r2, [r0]
1893 strh r3, [r0, #0x04]
1894 RET
1895 LMEMCPY_6_PAD
1896
1897 /*
1898 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1899 */
1900 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1901 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1902 #ifdef __ARMEB__
1903 mov r2, r2, lsl #8 /* r2 = 012. */
1904 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1905 #else
1906 mov r2, r2, lsr #8 /* r2 = .210 */
1907 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1908 #endif
1909 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1910 str r2, [r0]
1911 strh r3, [r0, #0x04]
1912 RET
1913 LMEMCPY_6_PAD
1914
1915 /*
1916 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1917 */
1918 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1919 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1920 #ifdef __ARMEB__
1921 mov r1, r3, lsr #16 /* r1 = ..23 */
1922 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1923 str r1, [r0]
1924 strh r3, [r0, #0x04]
1925 #else
1926 mov r1, r3, lsr #16 /* r1 = ..54 */
1927 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1928 str r2, [r0]
1929 strh r1, [r0, #0x04]
1930 #endif
1931 RET
1932 LMEMCPY_6_PAD
1933
1934 /*
1935 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1936 */
1937 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1938 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1939 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1940 #ifdef __ARMEB__
1941 mov r2, r2, lsl #24 /* r2 = 0... */
1942 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1943 mov r3, r3, lsl #8 /* r3 = 234. */
1944 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1945 #else
1946 mov r2, r2, lsr #24 /* r2 = ...0 */
1947 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1948 mov r1, r1, lsl #8 /* r1 = xx5. */
1949 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1950 #endif
1951 str r2, [r0]
1952 strh r1, [r0, #0x04]
1953 RET
1954 LMEMCPY_6_PAD
1955
1956 /*
1957 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1958 */
1959 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1960 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1961 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1962 strh r1, [r0, #0x01]
1963 #ifdef __ARMEB__
1964 mov r1, r3, lsr #24 /* r1 = ...0 */
1965 strb r1, [r0]
1966 mov r3, r3, lsl #8 /* r3 = 123. */
1967 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1968 #else
1969 strb r3, [r0]
1970 mov r3, r3, lsr #24 /* r3 = ...3 */
1971 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1972 mov r2, r2, lsr #8 /* r2 = ...5 */
1973 #endif
1974 strh r3, [r0, #0x03]
1975 strb r2, [r0, #0x05]
1976 RET
1977 LMEMCPY_6_PAD
1978
1979 /*
1980 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1981 */
1982 ldrb r2, [r1]
1983 ldrh r3, [r1, #0x01]
1984 ldrh ip, [r1, #0x03]
1985 ldrb r1, [r1, #0x05]
1986 strb r2, [r0]
1987 strh r3, [r0, #0x01]
1988 strh ip, [r0, #0x03]
1989 strb r1, [r0, #0x05]
1990 RET
1991 LMEMCPY_6_PAD
1992
1993 /*
1994 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1995 */
1996 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1997 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1998 #ifdef __ARMEB__
1999 mov r3, r2, lsr #8 /* r3 = ...0 */
2000 strb r3, [r0]
2001 strb r1, [r0, #0x05]
2002 mov r3, r1, lsr #8 /* r3 = .234 */
2003 strh r3, [r0, #0x03]
2004 mov r3, r2, lsl #8 /* r3 = .01. */
2005 orr r3, r3, r1, lsr #24 /* r3 = .012 */
2006 strh r3, [r0, #0x01]
2007 #else
2008 strb r2, [r0]
2009 mov r3, r1, lsr #24
2010 strb r3, [r0, #0x05]
2011 mov r3, r1, lsr #8 /* r3 = .543 */
2012 strh r3, [r0, #0x03]
2013 mov r3, r2, lsr #8 /* r3 = ...1 */
2014 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2015 strh r3, [r0, #0x01]
2016 #endif
2017 RET
2018 LMEMCPY_6_PAD
2019
2020 /*
2021 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2022 */
2023 ldrb r2, [r1]
2024 ldrh r3, [r1, #0x01]
2025 ldrh ip, [r1, #0x03]
2026 ldrb r1, [r1, #0x05]
2027 strb r2, [r0]
2028 strh r3, [r0, #0x01]
2029 strh ip, [r0, #0x03]
2030 strb r1, [r0, #0x05]
2031 RET
2032 LMEMCPY_6_PAD
2033
2034 /*
2035 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2036 */
2037 #ifdef __ARMEB__
2038 ldr r2, [r1] /* r2 = 0123 */
2039 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2040 mov r1, r2, lsr #16 /* r1 = ..01 */
2041 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2042 strh r1, [r0]
2043 str r3, [r0, #0x02]
2044 #else
2045 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2046 ldr r3, [r1] /* r3 = 3210 */
2047 mov r2, r2, lsl #16 /* r2 = 54.. */
2048 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2049 strh r3, [r0]
2050 str r2, [r0, #0x02]
2051 #endif
2052 RET
2053 LMEMCPY_6_PAD
2054
2055 /*
2056 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2057 */
2058 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2059 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2060 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2061 #ifdef __ARMEB__
2062 mov r2, r2, lsr #8 /* r2 = .345 */
2063 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2064 #else
2065 mov r2, r2, lsl #8 /* r2 = 543. */
2066 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2067 #endif
2068 strh r1, [r0]
2069 str r2, [r0, #0x02]
2070 RET
2071 LMEMCPY_6_PAD
2072
2073 /*
2074 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2075 */
2076 ldrh r2, [r1]
2077 ldr r3, [r1, #0x02]
2078 strh r2, [r0]
2079 str r3, [r0, #0x02]
2080 RET
2081 LMEMCPY_6_PAD
2082
2083 /*
2084 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2085 */
2086 ldrb r3, [r1] /* r3 = ...0 */
2087 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2088 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2089 #ifdef __ARMEB__
2090 mov r3, r3, lsl #8 /* r3 = ..0. */
2091 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2092 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2093 #else
2094 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2095 mov r1, r1, lsl #24 /* r1 = 5... */
2096 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2097 #endif
2098 strh r3, [r0]
2099 str r1, [r0, #0x02]
2100 RET
2101 LMEMCPY_6_PAD
2102
2103 /*
2104 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2105 */
2106 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2107 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2108 #ifdef __ARMEB__
2109 mov r3, r2, lsr #24 /* r3 = ...0 */
2110 strb r3, [r0]
2111 mov r2, r2, lsl #8 /* r2 = 123. */
2112 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2113 #else
2114 strb r2, [r0]
2115 mov r2, r2, lsr #8 /* r2 = .321 */
2116 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2117 mov r1, r1, lsr #8 /* r1 = ...5 */
2118 #endif
2119 str r2, [r0, #0x01]
2120 strb r1, [r0, #0x05]
2121 RET
2122 LMEMCPY_6_PAD
2123
2124 /*
2125 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2126 */
2127 ldrb r2, [r1]
2128 ldrh r3, [r1, #0x01]
2129 ldrh ip, [r1, #0x03]
2130 ldrb r1, [r1, #0x05]
2131 strb r2, [r0]
2132 strh r3, [r0, #0x01]
2133 strh ip, [r0, #0x03]
2134 strb r1, [r0, #0x05]
2135 RET
2136 LMEMCPY_6_PAD
2137
2138 /*
2139 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2140 */
2141 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2142 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2143 #ifdef __ARMEB__
2144 mov r3, r2, lsr #8 /* r3 = ...0 */
2145 strb r3, [r0]
2146 mov r2, r2, lsl #24 /* r2 = 1... */
2147 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2148 #else
2149 strb r2, [r0]
2150 mov r2, r2, lsr #8 /* r2 = ...1 */
2151 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2152 mov r1, r1, lsr #24 /* r1 = ...5 */
2153 #endif
2154 str r2, [r0, #0x01]
2155 strb r1, [r0, #0x05]
2156 RET
2157 LMEMCPY_6_PAD
2158
2159 /*
2160 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2161 */
2162 ldrb r2, [r1]
2163 ldr r3, [r1, #0x01]
2164 ldrb r1, [r1, #0x05]
2165 strb r2, [r0]
2166 str r3, [r0, #0x01]
2167 strb r1, [r0, #0x05]
2168 RET
2169 LMEMCPY_6_PAD
2170
2171
2172 /******************************************************************************
2173 * Special case for 8 byte copies
2174 */
2175 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2176 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2177 LMEMCPY_8_PAD
2178 .Lmemcpy_8:
2179 and r2, r1, #0x03
2180 orr r2, r2, r0, lsl #2
2181 ands r2, r2, #0x0f
2182 sub r3, pc, #0x14
2183 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2184
2185 /*
2186 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2187 */
2188 ldr r2, [r1]
2189 ldr r3, [r1, #0x04]
2190 str r2, [r0]
2191 str r3, [r0, #0x04]
2192 RET
2193 LMEMCPY_8_PAD
2194
2195 /*
2196 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2197 */
2198 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2199 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2200 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2201 #ifdef __ARMEB__
2202 mov r3, r3, lsl #8 /* r3 = 012. */
2203 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2204 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2205 #else
2206 mov r3, r3, lsr #8 /* r3 = .210 */
2207 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2208 mov r1, r1, lsl #24 /* r1 = 7... */
2209 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2210 #endif
2211 str r3, [r0]
2212 str r2, [r0, #0x04]
2213 RET
2214 LMEMCPY_8_PAD
2215
2216 /*
2217 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2218 */
2219 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2220 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2221 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2222 #ifdef __ARMEB__
2223 mov r2, r2, lsl #16 /* r2 = 01.. */
2224 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2225 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2226 #else
2227 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2228 mov r3, r3, lsr #16 /* r3 = ..54 */
2229 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2230 #endif
2231 str r2, [r0]
2232 str r3, [r0, #0x04]
2233 RET
2234 LMEMCPY_8_PAD
2235
2236 /*
2237 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2238 */
2239 ldrb r3, [r1] /* r3 = ...0 */
2240 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2241 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2242 #ifdef __ARMEB__
2243 mov r3, r3, lsl #24 /* r3 = 0... */
2244 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2245 mov r2, r2, lsl #24 /* r2 = 4... */
2246 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2247 #else
2248 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2249 mov r2, r2, lsr #24 /* r2 = ...4 */
2250 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2251 #endif
2252 str r3, [r0]
2253 str r2, [r0, #0x04]
2254 RET
2255 LMEMCPY_8_PAD
2256
2257 /*
2258 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2259 */
2260 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2261 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2262 #ifdef __ARMEB__
2263 mov r1, r3, lsr #24 /* r1 = ...0 */
2264 strb r1, [r0]
2265 mov r1, r3, lsr #8 /* r1 = .012 */
2266 strb r2, [r0, #0x07]
2267 mov r3, r3, lsl #24 /* r3 = 3... */
2268 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2269 #else
2270 strb r3, [r0]
2271 mov r1, r2, lsr #24 /* r1 = ...7 */
2272 strb r1, [r0, #0x07]
2273 mov r1, r3, lsr #8 /* r1 = .321 */
2274 mov r3, r3, lsr #24 /* r3 = ...3 */
2275 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2276 #endif
2277 strh r1, [r0, #0x01]
2278 str r3, [r0, #0x03]
2279 RET
2280 LMEMCPY_8_PAD
2281
2282 /*
2283 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2284 */
2285 ldrb r2, [r1]
2286 ldrh r3, [r1, #0x01]
2287 ldr ip, [r1, #0x03]
2288 ldrb r1, [r1, #0x07]
2289 strb r2, [r0]
2290 strh r3, [r0, #0x01]
2291 str ip, [r0, #0x03]
2292 strb r1, [r0, #0x07]
2293 RET
2294 LMEMCPY_8_PAD
2295
2296 /*
2297 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2298 */
2299 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2300 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2301 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2302 #ifdef __ARMEB__
2303 mov ip, r2, lsr #8 /* ip = ...0 */
2304 strb ip, [r0]
2305 mov ip, r2, lsl #8 /* ip = .01. */
2306 orr ip, ip, r3, lsr #24 /* ip = .012 */
2307 strb r1, [r0, #0x07]
2308 mov r3, r3, lsl #8 /* r3 = 345. */
2309 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2310 #else
2311 strb r2, [r0] /* 0 */
2312 mov ip, r1, lsr #8 /* ip = ...7 */
2313 strb ip, [r0, #0x07] /* 7 */
2314 mov ip, r2, lsr #8 /* ip = ...1 */
2315 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2316 mov r3, r3, lsr #8 /* r3 = .543 */
2317 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2318 #endif
2319 strh ip, [r0, #0x01]
2320 str r3, [r0, #0x03]
2321 RET
2322 LMEMCPY_8_PAD
2323
2324 /*
2325 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2326 */
2327 ldrb r3, [r1] /* r3 = ...0 */
2328 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2329 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2330 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2331 strb r3, [r0]
2332 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2333 #ifdef __ARMEB__
2334 strh r3, [r0, #0x01]
2335 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2336 #else
2337 strh ip, [r0, #0x01]
2338 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2339 #endif
2340 str r2, [r0, #0x03]
2341 strb r1, [r0, #0x07]
2342 RET
2343 LMEMCPY_8_PAD
2344
2345 /*
2346 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2347 */
2348 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2349 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2350 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2351 #ifdef __ARMEB__
2352 strh r1, [r0]
2353 mov r1, r3, lsr #16 /* r1 = ..45 */
2354 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2355 #else
2356 strh r2, [r0]
2357 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2358 mov r3, r3, lsr #16 /* r3 = ..76 */
2359 #endif
2360 str r2, [r0, #0x02]
2361 strh r3, [r0, #0x06]
2362 RET
2363 LMEMCPY_8_PAD
2364
2365 /*
2366 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2367 */
2368 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2369 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2370 ldrb ip, [r1, #0x07] /* ip = ...7 */
2371 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2372 strh r1, [r0]
2373 #ifdef __ARMEB__
2374 mov r1, r2, lsl #24 /* r1 = 2... */
2375 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2376 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2377 #else
2378 mov r1, r2, lsr #24 /* r1 = ...2 */
2379 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2380 mov r3, r3, lsr #24 /* r3 = ...6 */
2381 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2382 #endif
2383 str r1, [r0, #0x02]
2384 strh r3, [r0, #0x06]
2385 RET
2386 LMEMCPY_8_PAD
2387
2388 /*
2389 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2390 */
2391 ldrh r2, [r1]
2392 ldr ip, [r1, #0x02]
2393 ldrh r3, [r1, #0x06]
2394 strh r2, [r0]
2395 str ip, [r0, #0x02]
2396 strh r3, [r0, #0x06]
2397 RET
2398 LMEMCPY_8_PAD
2399
2400 /*
2401 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2402 */
2403 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2404 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2405 ldrb ip, [r1] /* ip = ...0 */
2406 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2407 strh r1, [r0, #0x06]
2408 #ifdef __ARMEB__
2409 mov r3, r3, lsr #24 /* r3 = ...5 */
2410 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2411 mov r2, r2, lsr #24 /* r2 = ...1 */
2412 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2413 #else
2414 mov r3, r3, lsl #24 /* r3 = 5... */
2415 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2416 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2417 #endif
2418 str r3, [r0, #0x02]
2419 strh r2, [r0]
2420 RET
2421 LMEMCPY_8_PAD
2422
2423 /*
2424 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2425 */
2426 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2427 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2428 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2429 strh r1, [r0, #0x05]
2430 #ifdef __ARMEB__
2431 strb r3, [r0, #0x07]
2432 mov r1, r2, lsr #24 /* r1 = ...0 */
2433 strb r1, [r0]
2434 mov r2, r2, lsl #8 /* r2 = 123. */
2435 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2436 str r2, [r0, #0x01]
2437 #else
2438 strb r2, [r0]
2439 mov r1, r3, lsr #24 /* r1 = ...7 */
2440 strb r1, [r0, #0x07]
2441 mov r2, r2, lsr #8 /* r2 = .321 */
2442 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2443 str r2, [r0, #0x01]
2444 #endif
2445 RET
2446 LMEMCPY_8_PAD
2447
2448 /*
2449 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2450 */
2451 ldrb r3, [r1] /* r3 = ...0 */
2452 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2453 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2454 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2455 strb r3, [r0]
2456 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2457 #ifdef __ARMEB__
2458 strh ip, [r0, #0x05]
2459 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2460 #else
2461 strh r3, [r0, #0x05]
2462 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2463 #endif
2464 str r2, [r0, #0x01]
2465 strb r1, [r0, #0x07]
2466 RET
2467 LMEMCPY_8_PAD
2468
2469 /*
2470 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2471 */
2472 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2473 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2474 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2475 #ifdef __ARMEB__
2476 mov ip, r2, lsr #8 /* ip = ...0 */
2477 strb ip, [r0]
2478 mov ip, r2, lsl #24 /* ip = 1... */
2479 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2480 strb r1, [r0, #0x07]
2481 mov r1, r1, lsr #8 /* r1 = ...6 */
2482 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2483 #else
2484 strb r2, [r0]
2485 mov ip, r2, lsr #8 /* ip = ...1 */
2486 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2487 mov r2, r1, lsr #8 /* r2 = ...7 */
2488 strb r2, [r0, #0x07]
2489 mov r1, r1, lsl #8 /* r1 = .76. */
2490 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2491 #endif
2492 str ip, [r0, #0x01]
2493 strh r1, [r0, #0x05]
2494 RET
2495 LMEMCPY_8_PAD
2496
2497 /*
2498 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2499 */
2500 ldrb r2, [r1]
2501 ldr ip, [r1, #0x01]
2502 ldrh r3, [r1, #0x05]
2503 ldrb r1, [r1, #0x07]
2504 strb r2, [r0]
2505 str ip, [r0, #0x01]
2506 strh r3, [r0, #0x05]
2507 strb r1, [r0, #0x07]
2508 RET
2509 LMEMCPY_8_PAD
2510
2511 /******************************************************************************
2512 * Special case for 12 byte copies
2513 */
2514 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2515 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2516 LMEMCPY_C_PAD
2517 .Lmemcpy_c:
2518 and r2, r1, #0x03
2519 orr r2, r2, r0, lsl #2
2520 ands r2, r2, #0x0f
2521 sub r3, pc, #0x14
2522 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2523
2524 /*
2525 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2526 */
2527 ldr r2, [r1]
2528 ldr r3, [r1, #0x04]
2529 ldr r1, [r1, #0x08]
2530 str r2, [r0]
2531 str r3, [r0, #0x04]
2532 str r1, [r0, #0x08]
2533 RET
2534 LMEMCPY_C_PAD
2535
2536 /*
2537 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2538 */
2539 ldrb r2, [r1, #0xb] /* r2 = ...B */
2540 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2541 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2542 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2543 #ifdef __ARMEB__
2544 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2545 str r2, [r0, #0x08]
2546 mov r2, ip, lsr #24 /* r2 = ...7 */
2547 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2548 mov r1, r1, lsl #8 /* r1 = 012. */
2549 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2550 #else
2551 mov r2, r2, lsl #24 /* r2 = B... */
2552 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2553 str r2, [r0, #0x08]
2554 mov r2, ip, lsl #24 /* r2 = 7... */
2555 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2556 mov r1, r1, lsr #8 /* r1 = .210 */
2557 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2558 #endif
2559 str r2, [r0, #0x04]
2560 str r1, [r0]
2561 RET
2562 LMEMCPY_C_PAD
2563
2564 /*
2565 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2566 */
2567 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2568 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2569 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2570 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2571 #ifdef __ARMEB__
2572 mov r2, r2, lsl #16 /* r2 = 01.. */
2573 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2574 str r2, [r0]
2575 mov r3, r3, lsl #16 /* r3 = 45.. */
2576 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2577 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2578 #else
2579 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2580 str r2, [r0]
2581 mov r3, r3, lsr #16 /* r3 = ..54 */
2582 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2583 mov r1, r1, lsl #16 /* r1 = BA.. */
2584 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2585 #endif
2586 str r3, [r0, #0x04]
2587 str r1, [r0, #0x08]
2588 RET
2589 LMEMCPY_C_PAD
2590
2591 /*
2592 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2593 */
2594 ldrb r2, [r1] /* r2 = ...0 */
2595 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2596 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2597 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2598 #ifdef __ARMEB__
2599 mov r2, r2, lsl #24 /* r2 = 0... */
2600 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2601 str r2, [r0]
2602 mov r3, r3, lsl #24 /* r3 = 4... */
2603 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2604 mov r1, r1, lsr #8 /* r1 = .9AB */
2605 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2606 #else
2607 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2608 str r2, [r0]
2609 mov r3, r3, lsr #24 /* r3 = ...4 */
2610 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2611 mov r1, r1, lsl #8 /* r1 = BA9. */
2612 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2613 #endif
2614 str r3, [r0, #0x04]
2615 str r1, [r0, #0x08]
2616 RET
2617 LMEMCPY_C_PAD
2618
2619 /*
2620 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2621 */
2622 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2623 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2624 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2625 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2626 strh r1, [r0, #0x01]
2627 #ifdef __ARMEB__
2628 mov r1, r2, lsr #24 /* r1 = ...0 */
2629 strb r1, [r0]
2630 mov r1, r2, lsl #24 /* r1 = 3... */
2631 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2632 mov r1, r3, lsl #24 /* r1 = 7... */
2633 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2634 #else
2635 strb r2, [r0]
2636 mov r1, r2, lsr #24 /* r1 = ...3 */
2637 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2638 mov r1, r3, lsr #24 /* r1 = ...7 */
2639 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2640 mov ip, ip, lsr #24 /* ip = ...B */
2641 #endif
2642 str r2, [r0, #0x03]
2643 str r1, [r0, #0x07]
2644 strb ip, [r0, #0x0b]
2645 RET
2646 LMEMCPY_C_PAD
2647
2648 /*
2649 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2650 */
2651 ldrb r2, [r1]
2652 ldrh r3, [r1, #0x01]
2653 ldr ip, [r1, #0x03]
2654 strb r2, [r0]
2655 ldr r2, [r1, #0x07]
2656 ldrb r1, [r1, #0x0b]
2657 strh r3, [r0, #0x01]
2658 str ip, [r0, #0x03]
2659 str r2, [r0, #0x07]
2660 strb r1, [r0, #0x0b]
2661 RET
2662 LMEMCPY_C_PAD
2663
2664 /*
2665 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2666 */
2667 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2668 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2669 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2670 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2671 #ifdef __ARMEB__
2672 mov r2, r2, ror #8 /* r2 = 1..0 */
2673 strb r2, [r0]
2674 mov r2, r2, lsr #16 /* r2 = ..1. */
2675 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2676 strh r2, [r0, #0x01]
2677 mov r2, r3, lsl #8 /* r2 = 345. */
2678 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2679 mov r2, ip, lsl #8 /* r2 = 789. */
2680 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2681 #else
2682 strb r2, [r0]
2683 mov r2, r2, lsr #8 /* r2 = ...1 */
2684 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2685 strh r2, [r0, #0x01]
2686 mov r2, r3, lsr #8 /* r2 = .543 */
2687 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2688 mov r2, ip, lsr #8 /* r2 = .987 */
2689 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2690 mov r1, r1, lsr #8 /* r1 = ...B */
2691 #endif
2692 str r3, [r0, #0x03]
2693 str r2, [r0, #0x07]
2694 strb r1, [r0, #0x0b]
2695 RET
2696 LMEMCPY_C_PAD
2697
2698 /*
2699 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2700 */
2701 ldrb r2, [r1]
2702 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2703 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2704 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2705 strb r2, [r0]
2706 #ifdef __ARMEB__
2707 mov r2, r3, lsr #16 /* r2 = ..12 */
2708 strh r2, [r0, #0x01]
2709 mov r3, r3, lsl #16 /* r3 = 34.. */
2710 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2711 mov ip, ip, lsl #16 /* ip = 78.. */
2712 orr ip, ip, r1, lsr #16 /* ip = 789A */
2713 mov r1, r1, lsr #8 /* r1 = .9AB */
2714 #else
2715 strh r3, [r0, #0x01]
2716 mov r3, r3, lsr #16 /* r3 = ..43 */
2717 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2718 mov ip, ip, lsr #16 /* ip = ..87 */
2719 orr ip, ip, r1, lsl #16 /* ip = A987 */
2720 mov r1, r1, lsr #16 /* r1 = ..xB */
2721 #endif
2722 str r3, [r0, #0x03]
2723 str ip, [r0, #0x07]
2724 strb r1, [r0, #0x0b]
2725 RET
2726 LMEMCPY_C_PAD
2727
2728 /*
2729 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2730 */
2731 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2732 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2733 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2734 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2735 #ifdef __ARMEB__
2736 strh r1, [r0]
2737 mov r1, ip, lsl #16 /* r1 = 23.. */
2738 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2739 mov r3, r3, lsl #16 /* r3 = 67.. */
2740 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2741 #else
2742 strh ip, [r0]
2743 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2744 mov r3, r3, lsr #16 /* r3 = ..76 */
2745 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2746 mov r2, r2, lsr #16 /* r2 = ..BA */
2747 #endif
2748 str r1, [r0, #0x02]
2749 str r3, [r0, #0x06]
2750 strh r2, [r0, #0x0a]
2751 RET
2752 LMEMCPY_C_PAD
2753
2754 /*
2755 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2756 */
2757 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2758 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2759 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2760 strh ip, [r0]
2761 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2762 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2763 #ifdef __ARMEB__
2764 mov r2, r2, lsl #24 /* r2 = 2... */
2765 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2766 mov r3, r3, lsl #24 /* r3 = 6... */
2767 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2768 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2769 #else
2770 mov r2, r2, lsr #24 /* r2 = ...2 */
2771 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2772 mov r3, r3, lsr #24 /* r3 = ...6 */
2773 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2774 mov r1, r1, lsl #8 /* r1 = ..B. */
2775 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2776 #endif
2777 str r2, [r0, #0x02]
2778 str r3, [r0, #0x06]
2779 strh r1, [r0, #0x0a]
2780 RET
2781 LMEMCPY_C_PAD
2782
2783 /*
2784 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2785 */
2786 ldrh r2, [r1]
2787 ldr r3, [r1, #0x02]
2788 ldr ip, [r1, #0x06]
2789 ldrh r1, [r1, #0x0a]
2790 strh r2, [r0]
2791 str r3, [r0, #0x02]
2792 str ip, [r0, #0x06]
2793 strh r1, [r0, #0x0a]
2794 RET
2795 LMEMCPY_C_PAD
2796
2797 /*
2798 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2799 */
2800 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2801 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2802 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2803 strh ip, [r0, #0x0a]
2804 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2805 ldrb r1, [r1] /* r1 = ...0 */
2806 #ifdef __ARMEB__
2807 mov r2, r2, lsr #24 /* r2 = ...9 */
2808 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2809 mov r3, r3, lsr #24 /* r3 = ...5 */
2810 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2811 mov r1, r1, lsl #8 /* r1 = ..0. */
2812 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2813 #else
2814 mov r2, r2, lsl #24 /* r2 = 9... */
2815 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2816 mov r3, r3, lsl #24 /* r3 = 5... */
2817 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2818 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2819 #endif
2820 str r2, [r0, #0x06]
2821 str r3, [r0, #0x02]
2822 strh r1, [r0]
2823 RET
2824 LMEMCPY_C_PAD
2825
2826 /*
2827 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2828 */
2829 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2830 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2831 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2832 #ifdef __ARMEB__
2833 mov r3, r2, lsr #24 /* r3 = ...0 */
2834 strb r3, [r0]
2835 mov r2, r2, lsl #8 /* r2 = 123. */
2836 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2837 str r2, [r0, #0x01]
2838 mov r2, ip, lsl #8 /* r2 = 567. */
2839 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2840 str r2, [r0, #0x05]
2841 mov r2, r1, lsr #8 /* r2 = ..9A */
2842 strh r2, [r0, #0x09]
2843 strb r1, [r0, #0x0b]
2844 #else
2845 strb r2, [r0]
2846 mov r3, r2, lsr #8 /* r3 = .321 */
2847 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2848 str r3, [r0, #0x01]
2849 mov r3, ip, lsr #8 /* r3 = .765 */
2850 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2851 str r3, [r0, #0x05]
2852 mov r1, r1, lsr #8 /* r1 = .BA9 */
2853 strh r1, [r0, #0x09]
2854 mov r1, r1, lsr #16 /* r1 = ...B */
2855 strb r1, [r0, #0x0b]
2856 #endif
2857 RET
2858 LMEMCPY_C_PAD
2859
2860 /*
2861 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2862 */
2863 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2864 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2865 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2866 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2867 strb r2, [r0, #0x0b]
2868 #ifdef __ARMEB__
2869 strh r3, [r0, #0x09]
2870 mov r3, r3, lsr #16 /* r3 = ..78 */
2871 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2872 mov ip, ip, lsr #16 /* ip = ..34 */
2873 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2874 mov r1, r1, lsr #16 /* r1 = ..x0 */
2875 #else
2876 mov r2, r3, lsr #16 /* r2 = ..A9 */
2877 strh r2, [r0, #0x09]
2878 mov r3, r3, lsl #16 /* r3 = 87.. */
2879 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2880 mov ip, ip, lsl #16 /* ip = 43.. */
2881 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2882 mov r1, r1, lsr #8 /* r1 = .210 */
2883 #endif
2884 str r3, [r0, #0x05]
2885 str ip, [r0, #0x01]
2886 strb r1, [r0]
2887 RET
2888 LMEMCPY_C_PAD
2889
2890 /*
2891 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2892 */
2893 #ifdef __ARMEB__
2894 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2895 ldr ip, [r1, #0x06] /* ip = 6789 */
2896 ldr r3, [r1, #0x02] /* r3 = 2345 */
2897 ldrh r1, [r1] /* r1 = ..01 */
2898 strb r2, [r0, #0x0b]
2899 mov r2, r2, lsr #8 /* r2 = ...A */
2900 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2901 mov ip, ip, lsr #8 /* ip = .678 */
2902 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2903 mov r3, r3, lsr #8 /* r3 = .234 */
2904 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2905 mov r1, r1, lsr #8 /* r1 = ...0 */
2906 strb r1, [r0]
2907 str r3, [r0, #0x01]
2908 str ip, [r0, #0x05]
2909 strh r2, [r0, #0x09]
2910 #else
2911 ldrh r2, [r1] /* r2 = ..10 */
2912 ldr r3, [r1, #0x02] /* r3 = 5432 */
2913 ldr ip, [r1, #0x06] /* ip = 9876 */
2914 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2915 strb r2, [r0]
2916 mov r2, r2, lsr #8 /* r2 = ...1 */
2917 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2918 mov r3, r3, lsr #24 /* r3 = ...5 */
2919 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2920 mov ip, ip, lsr #24 /* ip = ...9 */
2921 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2922 mov r1, r1, lsr #8 /* r1 = ...B */
2923 str r2, [r0, #0x01]
2924 str r3, [r0, #0x05]
2925 strh ip, [r0, #0x09]
2926 strb r1, [r0, #0x0b]
2927 #endif
2928 RET
2929 LMEMCPY_C_PAD
2930
2931 /*
2932 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2933 */
2934 ldrb r2, [r1]
2935 ldr r3, [r1, #0x01]
2936 ldr ip, [r1, #0x05]
2937 strb r2, [r0]
2938 ldrh r2, [r1, #0x09]
2939 ldrb r1, [r1, #0x0b]
2940 str r3, [r0, #0x01]
2941 str ip, [r0, #0x05]
2942 strh r2, [r0, #0x09]
2943 strb r1, [r0, #0x0b]
2944 RET
2945 END(memcpy)
2946 #endif /* _ARM_ARCH_5E */
2947
2948 #ifdef GPROF
2949
2950 ENTRY(user)
2951 nop
2952 END(user)
2953 ENTRY(btrap)
2954 nop
2955 END(btrap)
2956 ENTRY(etrap)
2957 nop
2958 END(etrap)
2959 ENTRY(bintr)
2960 nop
2961 END(bintr)
2962 ENTRY(eintr)
2963 nop
2964 END(eintr)
2965 #endif
Cache object: c37d6b076d8d2940d0c934d20e4af293
|