FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <machine/asm.h>
28 #include <machine/asmacros.h>
29 __FBSDID("$FreeBSD: releng/6.1/sys/arm/arm/support.S 144967 2005-04-12 22:46:09Z cognet $");
30
31 #include "assym.s"
32
33 /*
34 * memset: Sets a block of memory to the specified value
35 *
36 * On entry:
37 * r0 - dest address
38 * r1 - byte to write
39 * r2 - number of bytes to write
40 *
41 * On exit:
42 * r0 - dest address
43 */
44 /* LINTSTUB: Func: void bzero(void *, size_t) */
45 ENTRY(bzero)
46 mov r3, #0x00
47 b do_memset
48
49 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
50 ENTRY(memset)
51 and r3, r1, #0xff /* We deal with bytes */
52 mov r1, r2
53 do_memset:
54 cmp r1, #0x04 /* Do we have less than 4 bytes */
55 mov ip, r0
56 blt .Lmemset_lessthanfour
57
58 /* Ok first we will word align the address */
59 ands r2, ip, #0x03 /* Get the bottom two bits */
60 bne .Lmemset_wordunaligned /* The address is not word aligned */
61
62 /* We are now word aligned */
63 .Lmemset_wordaligned:
64 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
65 #ifdef __XSCALE__
66 tst ip, #0x04 /* Quad-align for Xscale */
67 #else
68 cmp r1, #0x10
69 #endif
70 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
71 #ifdef __XSCALE__
72 subne r1, r1, #0x04 /* Quad-align if necessary */
73 strne r3, [ip], #0x04
74 cmp r1, #0x10
75 #endif
76 blt .Lmemset_loop4 /* If less than 16 then use words */
77 mov r2, r3 /* Duplicate data */
78 cmp r1, #0x80 /* If < 128 then skip the big loop */
79 blt .Lmemset_loop32
80
81 /* Do 128 bytes at a time */
82 .Lmemset_loop128:
83 subs r1, r1, #0x80
84 #ifdef __XSCALE__
85 strged r2, [ip], #0x08
86 strged r2, [ip], #0x08
87 strged r2, [ip], #0x08
88 strged r2, [ip], #0x08
89 strged r2, [ip], #0x08
90 strged r2, [ip], #0x08
91 strged r2, [ip], #0x08
92 strged r2, [ip], #0x08
93 strged r2, [ip], #0x08
94 strged r2, [ip], #0x08
95 strged r2, [ip], #0x08
96 strged r2, [ip], #0x08
97 strged r2, [ip], #0x08
98 strged r2, [ip], #0x08
99 strged r2, [ip], #0x08
100 strged r2, [ip], #0x08
101 #else
102 stmgeia ip!, {r2-r3}
103 stmgeia ip!, {r2-r3}
104 stmgeia ip!, {r2-r3}
105 stmgeia ip!, {r2-r3}
106 stmgeia ip!, {r2-r3}
107 stmgeia ip!, {r2-r3}
108 stmgeia ip!, {r2-r3}
109 stmgeia ip!, {r2-r3}
110 stmgeia ip!, {r2-r3}
111 stmgeia ip!, {r2-r3}
112 stmgeia ip!, {r2-r3}
113 stmgeia ip!, {r2-r3}
114 stmgeia ip!, {r2-r3}
115 stmgeia ip!, {r2-r3}
116 stmgeia ip!, {r2-r3}
117 stmgeia ip!, {r2-r3}
118 #endif
119 bgt .Lmemset_loop128
120 RETeq /* Zero length so just exit */
121
122 add r1, r1, #0x80 /* Adjust for extra sub */
123
124 /* Do 32 bytes at a time */
125 .Lmemset_loop32:
126 subs r1, r1, #0x20
127 #ifdef __XSCALE__
128 strged r2, [ip], #0x08
129 strged r2, [ip], #0x08
130 strged r2, [ip], #0x08
131 strged r2, [ip], #0x08
132 #else
133 stmgeia ip!, {r2-r3}
134 stmgeia ip!, {r2-r3}
135 stmgeia ip!, {r2-r3}
136 stmgeia ip!, {r2-r3}
137 #endif
138 bgt .Lmemset_loop32
139 RETeq /* Zero length so just exit */
140
141 adds r1, r1, #0x10 /* Partially adjust for extra sub */
142
143 /* Deal with 16 bytes or more */
144 #ifdef __XSCALE__
145 strged r2, [ip], #0x08
146 strged r2, [ip], #0x08
147 #else
148 stmgeia ip!, {r2-r3}
149 stmgeia ip!, {r2-r3}
150 #endif
151 RETeq /* Zero length so just exit */
152
153 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
154
155 /* We have at least 4 bytes so copy as words */
156 .Lmemset_loop4:
157 subs r1, r1, #0x04
158 strge r3, [ip], #0x04
159 bgt .Lmemset_loop4
160 RETeq /* Zero length so just exit */
161
162 #ifdef __XSCALE__
163 /* Compensate for 64-bit alignment check */
164 adds r1, r1, #0x04
165 RETeq
166 cmp r1, #2
167 #else
168 cmp r1, #-2
169 #endif
170
171 strb r3, [ip], #0x01 /* Set 1 byte */
172 strgeb r3, [ip], #0x01 /* Set another byte */
173 strgtb r3, [ip] /* and a third */
174 RET /* Exit */
175
176 .Lmemset_wordunaligned:
177 rsb r2, r2, #0x004
178 strb r3, [ip], #0x01 /* Set 1 byte */
179 cmp r2, #0x02
180 strgeb r3, [ip], #0x01 /* Set another byte */
181 sub r1, r1, r2
182 strgtb r3, [ip], #0x01 /* and a third */
183 cmp r1, #0x04 /* More than 4 bytes left? */
184 bge .Lmemset_wordaligned /* Yup */
185
186 .Lmemset_lessthanfour:
187 cmp r1, #0x00
188 RETeq /* Zero length so exit */
189 strb r3, [ip], #0x01 /* Set 1 byte */
190 cmp r1, #0x02
191 strgeb r3, [ip], #0x01 /* Set another byte */
192 strgtb r3, [ip] /* and a third */
193 RET /* Exit */
194
195 ENTRY(bcmp)
196 mov ip, r0
197 cmp r2, #0x06
198 beq .Lmemcmp_6bytes
199 mov r0, #0x00
200
201 /* Are both addresses aligned the same way? */
202 cmp r2, #0x00
203 eornes r3, ip, r1
204 RETeq /* len == 0, or same addresses! */
205 tst r3, #0x03
206 subne r2, r2, #0x01
207 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
208
209 /* Word-align the addresses, if necessary */
210 sub r3, r1, #0x05
211 ands r3, r3, #0x03
212 add r3, r3, r3, lsl #1
213 addne pc, pc, r3, lsl #3
214 nop
215
216 /* Compare up to 3 bytes */
217 ldrb r0, [ip], #0x01
218 ldrb r3, [r1], #0x01
219 subs r0, r0, r3
220 RETne
221 subs r2, r2, #0x01
222 RETeq
223
224 /* Compare up to 2 bytes */
225 ldrb r0, [ip], #0x01
226 ldrb r3, [r1], #0x01
227 subs r0, r0, r3
228 RETne
229 subs r2, r2, #0x01
230 RETeq
231
232 /* Compare 1 byte */
233 ldrb r0, [ip], #0x01
234 ldrb r3, [r1], #0x01
235 subs r0, r0, r3
236 RETne
237 subs r2, r2, #0x01
238 RETeq
239
240 /* Compare 4 bytes at a time, if possible */
241 subs r2, r2, #0x04
242 bcc .Lmemcmp_bytewise
243 .Lmemcmp_word_aligned:
244 ldr r0, [ip], #0x04
245 ldr r3, [r1], #0x04
246 subs r2, r2, #0x04
247 cmpcs r0, r3
248 beq .Lmemcmp_word_aligned
249 sub r0, r0, r3
250
251 /* Correct for extra subtraction, and check if done */
252 adds r2, r2, #0x04
253 cmpeq r0, #0x00 /* If done, did all bytes match? */
254 RETeq /* Yup. Just return */
255
256 /* Re-do the final word byte-wise */
257 sub ip, ip, #0x04
258 sub r1, r1, #0x04
259
260 .Lmemcmp_bytewise:
261 add r2, r2, #0x03
262 .Lmemcmp_bytewise2:
263 ldrb r0, [ip], #0x01
264 ldrb r3, [r1], #0x01
265 subs r2, r2, #0x01
266 cmpcs r0, r3
267 beq .Lmemcmp_bytewise2
268 sub r0, r0, r3
269 RET
270
271 /*
272 * 6 byte compares are very common, thanks to the network stack.
273 * This code is hand-scheduled to reduce the number of stalls for
274 * load results. Everything else being equal, this will be ~32%
275 * faster than a byte-wise memcmp.
276 */
277 .align 5
278 .Lmemcmp_6bytes:
279 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
280 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
281 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
282 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
283 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */
284 RETne /* Return if mismatch on #0 */
285 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
286 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */
287 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */
288 RETne /* Return if mismatch on #1 */
289 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
290 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
291 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */
292 RETne /* Return if mismatch on #2 */
293 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
294 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */
295 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */
296 RETne /* Return if mismatch on #3 */
297 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
298 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
299 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */
300 RETne /* Return if mismatch on #4 */
301 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
302 RET
303
304 ENTRY(bcopy)
305 /* switch the source and destination registers */
306 eor r0, r1, r0
307 eor r1, r0, r1
308 eor r0, r1, r0
309 ENTRY(memmove)
310 /* Do the buffers overlap? */
311 cmp r0, r1
312 RETeq /* Bail now if src/dst are the same */
313 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
314 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
315 cmp r3, r2 /* if (r3 < len) we have an overlap */
316 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
317
318 /* Determine copy direction */
319 cmp r1, r0
320 bcc .Lmemmove_backwards
321
322 moveq r0, #0 /* Quick abort for len=0 */
323 RETeq
324
325 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
326 subs r2, r2, #4
327 blt .Lmemmove_fl4 /* less than 4 bytes */
328 ands r12, r0, #3
329 bne .Lmemmove_fdestul /* oh unaligned destination addr */
330 ands r12, r1, #3
331 bne .Lmemmove_fsrcul /* oh unaligned source addr */
332
333 .Lmemmove_ft8:
334 /* We have aligned source and destination */
335 subs r2, r2, #8
336 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
337 subs r2, r2, #0x14
338 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
339 stmdb sp!, {r4} /* borrow r4 */
340
341 /* blat 32 bytes at a time */
342 /* XXX for really big copies perhaps we should use more registers */
343 .Lmemmove_floop32:
344 ldmia r1!, {r3, r4, r12, lr}
345 stmia r0!, {r3, r4, r12, lr}
346 ldmia r1!, {r3, r4, r12, lr}
347 stmia r0!, {r3, r4, r12, lr}
348 subs r2, r2, #0x20
349 bge .Lmemmove_floop32
350
351 cmn r2, #0x10
352 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
353 stmgeia r0!, {r3, r4, r12, lr}
354 subge r2, r2, #0x10
355 ldmia sp!, {r4} /* return r4 */
356
357 .Lmemmove_fl32:
358 adds r2, r2, #0x14
359
360 /* blat 12 bytes at a time */
361 .Lmemmove_floop12:
362 ldmgeia r1!, {r3, r12, lr}
363 stmgeia r0!, {r3, r12, lr}
364 subges r2, r2, #0x0c
365 bge .Lmemmove_floop12
366
367 .Lmemmove_fl12:
368 adds r2, r2, #8
369 blt .Lmemmove_fl4
370
371 subs r2, r2, #4
372 ldrlt r3, [r1], #4
373 strlt r3, [r0], #4
374 ldmgeia r1!, {r3, r12}
375 stmgeia r0!, {r3, r12}
376 subge r2, r2, #4
377
378 .Lmemmove_fl4:
379 /* less than 4 bytes to go */
380 adds r2, r2, #4
381 ldmeqia sp!, {r0, pc} /* done */
382
383 /* copy the crud byte at a time */
384 cmp r2, #2
385 ldrb r3, [r1], #1
386 strb r3, [r0], #1
387 ldrgeb r3, [r1], #1
388 strgeb r3, [r0], #1
389 ldrgtb r3, [r1], #1
390 strgtb r3, [r0], #1
391 ldmia sp!, {r0, pc}
392
393 /* erg - unaligned destination */
394 .Lmemmove_fdestul:
395 rsb r12, r12, #4
396 cmp r12, #2
397
398 /* align destination with byte copies */
399 ldrb r3, [r1], #1
400 strb r3, [r0], #1
401 ldrgeb r3, [r1], #1
402 strgeb r3, [r0], #1
403 ldrgtb r3, [r1], #1
404 strgtb r3, [r0], #1
405 subs r2, r2, r12
406 blt .Lmemmove_fl4 /* less the 4 bytes */
407
408 ands r12, r1, #3
409 beq .Lmemmove_ft8 /* we have an aligned source */
410
411 /* erg - unaligned source */
412 /* This is where it gets nasty ... */
413 .Lmemmove_fsrcul:
414 bic r1, r1, #3
415 ldr lr, [r1], #4
416 cmp r12, #2
417 bgt .Lmemmove_fsrcul3
418 beq .Lmemmove_fsrcul2
419 cmp r2, #0x0c
420 blt .Lmemmove_fsrcul1loop4
421 sub r2, r2, #0x0c
422 stmdb sp!, {r4, r5}
423
424 .Lmemmove_fsrcul1loop16:
425 #ifdef __ARMEB__
426 mov r3, lr, lsl #8
427 #else
428 mov r3, lr, lsr #8
429 #endif
430 ldmia r1!, {r4, r5, r12, lr}
431 #ifdef __ARMEB__
432 orr r3, r3, r4, lsr #24
433 mov r4, r4, lsl #8
434 orr r4, r4, r5, lsr #24
435 mov r5, r5, lsl #8
436 orr r5, r5, r12, lsr #24
437 mov r12, r12, lsl #8
438 orr r12, r12, lr, lsr #24
439 #else
440 orr r3, r3, r4, lsl #24
441 mov r4, r4, lsr #8
442 orr r4, r4, r5, lsl #24
443 mov r5, r5, lsr #8
444 orr r5, r5, r12, lsl #24
445 mov r12, r12, lsr #8
446 orr r12, r12, lr, lsl #24
447 #endif
448 stmia r0!, {r3-r5, r12}
449 subs r2, r2, #0x10
450 bge .Lmemmove_fsrcul1loop16
451 ldmia sp!, {r4, r5}
452 adds r2, r2, #0x0c
453 blt .Lmemmove_fsrcul1l4
454
455 .Lmemmove_fsrcul1loop4:
456 #ifdef __ARMEB__
457 mov r12, lr, lsl #8
458 #else
459 mov r12, lr, lsr #8
460 #endif
461 ldr lr, [r1], #4
462 #ifdef __ARMEB__
463 orr r12, r12, lr, lsr #24
464 #else
465 orr r12, r12, lr, lsl #24
466 #endif
467 str r12, [r0], #4
468 subs r2, r2, #4
469 bge .Lmemmove_fsrcul1loop4
470
471 .Lmemmove_fsrcul1l4:
472 sub r1, r1, #3
473 b .Lmemmove_fl4
474
475 .Lmemmove_fsrcul2:
476 cmp r2, #0x0c
477 blt .Lmemmove_fsrcul2loop4
478 sub r2, r2, #0x0c
479 stmdb sp!, {r4, r5}
480
481 .Lmemmove_fsrcul2loop16:
482 #ifdef __ARMEB__
483 mov r3, lr, lsl #16
484 #else
485 mov r3, lr, lsr #16
486 #endif
487 ldmia r1!, {r4, r5, r12, lr}
488 #ifdef __ARMEB__
489 orr r3, r3, r4, lsr #16
490 mov r4, r4, lsl #16
491 orr r4, r4, r5, lsr #16
492 mov r5, r5, lsl #16
493 orr r5, r5, r12, lsr #16
494 mov r12, r12, lsl #16
495 orr r12, r12, lr, lsr #16
496 #else
497 orr r3, r3, r4, lsl #16
498 mov r4, r4, lsr #16
499 orr r4, r4, r5, lsl #16
500 mov r5, r5, lsr #16
501 orr r5, r5, r12, lsl #16
502 mov r12, r12, lsr #16
503 orr r12, r12, lr, lsl #16
504 #endif
505 stmia r0!, {r3-r5, r12}
506 subs r2, r2, #0x10
507 bge .Lmemmove_fsrcul2loop16
508 ldmia sp!, {r4, r5}
509 adds r2, r2, #0x0c
510 blt .Lmemmove_fsrcul2l4
511
512 .Lmemmove_fsrcul2loop4:
513 #ifdef __ARMEB__
514 mov r12, lr, lsl #16
515 #else
516 mov r12, lr, lsr #16
517 #endif
518 ldr lr, [r1], #4
519 #ifdef __ARMEB__
520 orr r12, r12, lr, lsr #16
521 #else
522 orr r12, r12, lr, lsl #16
523 #endif
524 str r12, [r0], #4
525 subs r2, r2, #4
526 bge .Lmemmove_fsrcul2loop4
527
528 .Lmemmove_fsrcul2l4:
529 sub r1, r1, #2
530 b .Lmemmove_fl4
531
532 .Lmemmove_fsrcul3:
533 cmp r2, #0x0c
534 blt .Lmemmove_fsrcul3loop4
535 sub r2, r2, #0x0c
536 stmdb sp!, {r4, r5}
537
538 .Lmemmove_fsrcul3loop16:
539 #ifdef __ARMEB__
540 mov r3, lr, lsl #24
541 #else
542 mov r3, lr, lsr #24
543 #endif
544 ldmia r1!, {r4, r5, r12, lr}
545 #ifdef __ARMEB__
546 orr r3, r3, r4, lsr #8
547 mov r4, r4, lsl #24
548 orr r4, r4, r5, lsr #8
549 mov r5, r5, lsl #24
550 orr r5, r5, r12, lsr #8
551 mov r12, r12, lsl #24
552 orr r12, r12, lr, lsr #8
553 #else
554 orr r3, r3, r4, lsl #8
555 mov r4, r4, lsr #24
556 orr r4, r4, r5, lsl #8
557 mov r5, r5, lsr #24
558 orr r5, r5, r12, lsl #8
559 mov r12, r12, lsr #24
560 orr r12, r12, lr, lsl #8
561 #endif
562 stmia r0!, {r3-r5, r12}
563 subs r2, r2, #0x10
564 bge .Lmemmove_fsrcul3loop16
565 ldmia sp!, {r4, r5}
566 adds r2, r2, #0x0c
567 blt .Lmemmove_fsrcul3l4
568
569 .Lmemmove_fsrcul3loop4:
570 #ifdef __ARMEB__
571 mov r12, lr, lsl #24
572 #else
573 mov r12, lr, lsr #24
574 #endif
575 ldr lr, [r1], #4
576 #ifdef __ARMEB__
577 orr r12, r12, lr, lsr #8
578 #else
579 orr r12, r12, lr, lsl #8
580 #endif
581 str r12, [r0], #4
582 subs r2, r2, #4
583 bge .Lmemmove_fsrcul3loop4
584
585 .Lmemmove_fsrcul3l4:
586 sub r1, r1, #1
587 b .Lmemmove_fl4
588
589 .Lmemmove_backwards:
590 add r1, r1, r2
591 add r0, r0, r2
592 subs r2, r2, #4
593 blt .Lmemmove_bl4 /* less than 4 bytes */
594 ands r12, r0, #3
595 bne .Lmemmove_bdestul /* oh unaligned destination addr */
596 ands r12, r1, #3
597 bne .Lmemmove_bsrcul /* oh unaligned source addr */
598
599 .Lmemmove_bt8:
600 /* We have aligned source and destination */
601 subs r2, r2, #8
602 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
603 stmdb sp!, {r4, lr}
604 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
605 blt .Lmemmove_bl32
606
607 /* blat 32 bytes at a time */
608 /* XXX for really big copies perhaps we should use more registers */
609 .Lmemmove_bloop32:
610 ldmdb r1!, {r3, r4, r12, lr}
611 stmdb r0!, {r3, r4, r12, lr}
612 ldmdb r1!, {r3, r4, r12, lr}
613 stmdb r0!, {r3, r4, r12, lr}
614 subs r2, r2, #0x20
615 bge .Lmemmove_bloop32
616
617 .Lmemmove_bl32:
618 cmn r2, #0x10
619 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
620 stmgedb r0!, {r3, r4, r12, lr}
621 subge r2, r2, #0x10
622 adds r2, r2, #0x14
623 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
624 stmgedb r0!, {r3, r12, lr}
625 subge r2, r2, #0x0c
626 ldmia sp!, {r4, lr}
627
628 .Lmemmove_bl12:
629 adds r2, r2, #8
630 blt .Lmemmove_bl4
631 subs r2, r2, #4
632 ldrlt r3, [r1, #-4]!
633 strlt r3, [r0, #-4]!
634 ldmgedb r1!, {r3, r12}
635 stmgedb r0!, {r3, r12}
636 subge r2, r2, #4
637
638 .Lmemmove_bl4:
639 /* less than 4 bytes to go */
640 adds r2, r2, #4
641 RETeq /* done */
642
643 /* copy the crud byte at a time */
644 cmp r2, #2
645 ldrb r3, [r1, #-1]!
646 strb r3, [r0, #-1]!
647 ldrgeb r3, [r1, #-1]!
648 strgeb r3, [r0, #-1]!
649 ldrgtb r3, [r1, #-1]!
650 strgtb r3, [r0, #-1]!
651 RET
652
653 /* erg - unaligned destination */
654 .Lmemmove_bdestul:
655 cmp r12, #2
656
657 /* align destination with byte copies */
658 ldrb r3, [r1, #-1]!
659 strb r3, [r0, #-1]!
660 ldrgeb r3, [r1, #-1]!
661 strgeb r3, [r0, #-1]!
662 ldrgtb r3, [r1, #-1]!
663 strgtb r3, [r0, #-1]!
664 subs r2, r2, r12
665 blt .Lmemmove_bl4 /* less than 4 bytes to go */
666 ands r12, r1, #3
667 beq .Lmemmove_bt8 /* we have an aligned source */
668
669 /* erg - unaligned source */
670 /* This is where it gets nasty ... */
671 .Lmemmove_bsrcul:
672 bic r1, r1, #3
673 ldr r3, [r1, #0]
674 cmp r12, #2
675 blt .Lmemmove_bsrcul1
676 beq .Lmemmove_bsrcul2
677 cmp r2, #0x0c
678 blt .Lmemmove_bsrcul3loop4
679 sub r2, r2, #0x0c
680 stmdb sp!, {r4, r5, lr}
681
682 .Lmemmove_bsrcul3loop16:
683 #ifdef __ARMEB__
684 mov lr, r3, lsr #8
685 #else
686 mov lr, r3, lsl #8
687 #endif
688 ldmdb r1!, {r3-r5, r12}
689 #ifdef __ARMEB__
690 orr lr, lr, r12, lsl #24
691 mov r12, r12, lsr #8
692 orr r12, r12, r5, lsl #24
693 mov r5, r5, lsr #8
694 orr r5, r5, r4, lsl #24
695 mov r4, r4, lsr #8
696 orr r4, r4, r3, lsl #24
697 #else
698 orr lr, lr, r12, lsr #24
699 mov r12, r12, lsl #8
700 orr r12, r12, r5, lsr #24
701 mov r5, r5, lsl #8
702 orr r5, r5, r4, lsr #24
703 mov r4, r4, lsl #8
704 orr r4, r4, r3, lsr #24
705 #endif
706 stmdb r0!, {r4, r5, r12, lr}
707 subs r2, r2, #0x10
708 bge .Lmemmove_bsrcul3loop16
709 ldmia sp!, {r4, r5, lr}
710 adds r2, r2, #0x0c
711 blt .Lmemmove_bsrcul3l4
712
713 .Lmemmove_bsrcul3loop4:
714 #ifdef __ARMEB__
715 mov r12, r3, lsr #8
716 #else
717 mov r12, r3, lsl #8
718 #endif
719 ldr r3, [r1, #-4]!
720 #ifdef __ARMEB__
721 orr r12, r12, r3, lsl #24
722 #else
723 orr r12, r12, r3, lsr #24
724 #endif
725 str r12, [r0, #-4]!
726 subs r2, r2, #4
727 bge .Lmemmove_bsrcul3loop4
728
729 .Lmemmove_bsrcul3l4:
730 add r1, r1, #3
731 b .Lmemmove_bl4
732
733 .Lmemmove_bsrcul2:
734 cmp r2, #0x0c
735 blt .Lmemmove_bsrcul2loop4
736 sub r2, r2, #0x0c
737 stmdb sp!, {r4, r5, lr}
738
739 .Lmemmove_bsrcul2loop16:
740 #ifdef __ARMEB__
741 mov lr, r3, lsr #16
742 #else
743 mov lr, r3, lsl #16
744 #endif
745 ldmdb r1!, {r3-r5, r12}
746 #ifdef __ARMEB__
747 orr lr, lr, r12, lsl #16
748 mov r12, r12, lsr #16
749 orr r12, r12, r5, lsl #16
750 mov r5, r5, lsr #16
751 orr r5, r5, r4, lsl #16
752 mov r4, r4, lsr #16
753 orr r4, r4, r3, lsl #16
754 #else
755 orr lr, lr, r12, lsr #16
756 mov r12, r12, lsl #16
757 orr r12, r12, r5, lsr #16
758 mov r5, r5, lsl #16
759 orr r5, r5, r4, lsr #16
760 mov r4, r4, lsl #16
761 orr r4, r4, r3, lsr #16
762 #endif
763 stmdb r0!, {r4, r5, r12, lr}
764 subs r2, r2, #0x10
765 bge .Lmemmove_bsrcul2loop16
766 ldmia sp!, {r4, r5, lr}
767 adds r2, r2, #0x0c
768 blt .Lmemmove_bsrcul2l4
769
770 .Lmemmove_bsrcul2loop4:
771 #ifdef __ARMEB__
772 mov r12, r3, lsr #16
773 #else
774 mov r12, r3, lsl #16
775 #endif
776 ldr r3, [r1, #-4]!
777 #ifdef __ARMEB__
778 orr r12, r12, r3, lsl #16
779 #else
780 orr r12, r12, r3, lsr #16
781 #endif
782 str r12, [r0, #-4]!
783 subs r2, r2, #4
784 bge .Lmemmove_bsrcul2loop4
785
786 .Lmemmove_bsrcul2l4:
787 add r1, r1, #2
788 b .Lmemmove_bl4
789
790 .Lmemmove_bsrcul1:
791 cmp r2, #0x0c
792 blt .Lmemmove_bsrcul1loop4
793 sub r2, r2, #0x0c
794 stmdb sp!, {r4, r5, lr}
795
796 .Lmemmove_bsrcul1loop32:
797 #ifdef __ARMEB__
798 mov lr, r3, lsr #24
799 #else
800 mov lr, r3, lsl #24
801 #endif
802 ldmdb r1!, {r3-r5, r12}
803 #ifdef __ARMEB__
804 orr lr, lr, r12, lsl #8
805 mov r12, r12, lsr #24
806 orr r12, r12, r5, lsl #8
807 mov r5, r5, lsr #24
808 orr r5, r5, r4, lsl #8
809 mov r4, r4, lsr #24
810 orr r4, r4, r3, lsl #8
811 #else
812 orr lr, lr, r12, lsr #8
813 mov r12, r12, lsl #24
814 orr r12, r12, r5, lsr #8
815 mov r5, r5, lsl #24
816 orr r5, r5, r4, lsr #8
817 mov r4, r4, lsl #24
818 orr r4, r4, r3, lsr #8
819 #endif
820 stmdb r0!, {r4, r5, r12, lr}
821 subs r2, r2, #0x10
822 bge .Lmemmove_bsrcul1loop32
823 ldmia sp!, {r4, r5, lr}
824 adds r2, r2, #0x0c
825 blt .Lmemmove_bsrcul1l4
826
827 .Lmemmove_bsrcul1loop4:
828 #ifdef __ARMEB__
829 mov r12, r3, lsr #24
830 #else
831 mov r12, r3, lsl #24
832 #endif
833 ldr r3, [r1, #-4]!
834 #ifdef __ARMEB__
835 orr r12, r12, r3, lsl #8
836 #else
837 orr r12, r12, r3, lsr #8
838 #endif
839 str r12, [r0, #-4]!
840 subs r2, r2, #4
841 bge .Lmemmove_bsrcul1loop4
842
843 .Lmemmove_bsrcul1l4:
844 add r1, r1, #1
845 b .Lmemmove_bl4
846
847 #if !defined(__XSCALE__)
848 ENTRY(memcpy)
849 /* save leaf functions having to store this away */
850 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
851
852 subs r2, r2, #4
853 blt .Lmemcpy_l4 /* less than 4 bytes */
854 ands r12, r0, #3
855 bne .Lmemcpy_destul /* oh unaligned destination addr */
856 ands r12, r1, #3
857 bne .Lmemcpy_srcul /* oh unaligned source addr */
858
859 .Lmemcpy_t8:
860 /* We have aligned source and destination */
861 subs r2, r2, #8
862 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
863 subs r2, r2, #0x14
864 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
865 stmdb sp!, {r4} /* borrow r4 */
866
867 /* blat 32 bytes at a time */
868 /* XXX for really big copies perhaps we should use more registers */
869 .Lmemcpy_loop32:
870 ldmia r1!, {r3, r4, r12, lr}
871 stmia r0!, {r3, r4, r12, lr}
872 ldmia r1!, {r3, r4, r12, lr}
873 stmia r0!, {r3, r4, r12, lr}
874 subs r2, r2, #0x20
875 bge .Lmemcpy_loop32
876
877 cmn r2, #0x10
878 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
879 stmgeia r0!, {r3, r4, r12, lr}
880 subge r2, r2, #0x10
881 ldmia sp!, {r4} /* return r4 */
882
883 .Lmemcpy_l32:
884 adds r2, r2, #0x14
885
886 /* blat 12 bytes at a time */
887 .Lmemcpy_loop12:
888 ldmgeia r1!, {r3, r12, lr}
889 stmgeia r0!, {r3, r12, lr}
890 subges r2, r2, #0x0c
891 bge .Lmemcpy_loop12
892
893 .Lmemcpy_l12:
894 adds r2, r2, #8
895 blt .Lmemcpy_l4
896
897 subs r2, r2, #4
898 ldrlt r3, [r1], #4
899 strlt r3, [r0], #4
900 ldmgeia r1!, {r3, r12}
901 stmgeia r0!, {r3, r12}
902 subge r2, r2, #4
903
904 .Lmemcpy_l4:
905 /* less than 4 bytes to go */
906 adds r2, r2, #4
907 #ifdef __APCS_26_
908 ldmeqia sp!, {r0, pc}^ /* done */
909 #else
910 ldmeqia sp!, {r0, pc} /* done */
911 #endif
912 /* copy the crud byte at a time */
913 cmp r2, #2
914 ldrb r3, [r1], #1
915 strb r3, [r0], #1
916 ldrgeb r3, [r1], #1
917 strgeb r3, [r0], #1
918 ldrgtb r3, [r1], #1
919 strgtb r3, [r0], #1
920 ldmia sp!, {r0, pc}
921
922 /* erg - unaligned destination */
923 .Lmemcpy_destul:
924 rsb r12, r12, #4
925 cmp r12, #2
926
927 /* align destination with byte copies */
928 ldrb r3, [r1], #1
929 strb r3, [r0], #1
930 ldrgeb r3, [r1], #1
931 strgeb r3, [r0], #1
932 ldrgtb r3, [r1], #1
933 strgtb r3, [r0], #1
934 subs r2, r2, r12
935 blt .Lmemcpy_l4 /* less the 4 bytes */
936
937 ands r12, r1, #3
938 beq .Lmemcpy_t8 /* we have an aligned source */
939
940 /* erg - unaligned source */
941 /* This is where it gets nasty ... */
942 .Lmemcpy_srcul:
943 bic r1, r1, #3
944 ldr lr, [r1], #4
945 cmp r12, #2
946 bgt .Lmemcpy_srcul3
947 beq .Lmemcpy_srcul2
948 cmp r2, #0x0c
949 blt .Lmemcpy_srcul1loop4
950 sub r2, r2, #0x0c
951 stmdb sp!, {r4, r5}
952
953 .Lmemcpy_srcul1loop16:
954 mov r3, lr, lsr #8
955 ldmia r1!, {r4, r5, r12, lr}
956 orr r3, r3, r4, lsl #24
957 mov r4, r4, lsr #8
958 orr r4, r4, r5, lsl #24
959 mov r5, r5, lsr #8
960 orr r5, r5, r12, lsl #24
961 mov r12, r12, lsr #8
962 orr r12, r12, lr, lsl #24
963 stmia r0!, {r3-r5, r12}
964 subs r2, r2, #0x10
965 bge .Lmemcpy_srcul1loop16
966 ldmia sp!, {r4, r5}
967 adds r2, r2, #0x0c
968 blt .Lmemcpy_srcul1l4
969
970 .Lmemcpy_srcul1loop4:
971 mov r12, lr, lsr #8
972 ldr lr, [r1], #4
973 orr r12, r12, lr, lsl #24
974 str r12, [r0], #4
975 subs r2, r2, #4
976 bge .Lmemcpy_srcul1loop4
977
978 .Lmemcpy_srcul1l4:
979 sub r1, r1, #3
980 b .Lmemcpy_l4
981
982 .Lmemcpy_srcul2:
983 cmp r2, #0x0c
984 blt .Lmemcpy_srcul2loop4
985 sub r2, r2, #0x0c
986 stmdb sp!, {r4, r5}
987
988 .Lmemcpy_srcul2loop16:
989 mov r3, lr, lsr #16
990 ldmia r1!, {r4, r5, r12, lr}
991 orr r3, r3, r4, lsl #16
992 mov r4, r4, lsr #16
993 orr r4, r4, r5, lsl #16
994 mov r5, r5, lsr #16
995 orr r5, r5, r12, lsl #16
996 mov r12, r12, lsr #16
997 orr r12, r12, lr, lsl #16
998 stmia r0!, {r3-r5, r12}
999 subs r2, r2, #0x10
1000 bge .Lmemcpy_srcul2loop16
1001 ldmia sp!, {r4, r5}
1002 adds r2, r2, #0x0c
1003 blt .Lmemcpy_srcul2l4
1004
1005 .Lmemcpy_srcul2loop4:
1006 mov r12, lr, lsr #16
1007 ldr lr, [r1], #4
1008 orr r12, r12, lr, lsl #16
1009 str r12, [r0], #4
1010 subs r2, r2, #4
1011 bge .Lmemcpy_srcul2loop4
1012
1013 .Lmemcpy_srcul2l4:
1014 sub r1, r1, #2
1015 b .Lmemcpy_l4
1016
1017 .Lmemcpy_srcul3:
1018 cmp r2, #0x0c
1019 blt .Lmemcpy_srcul3loop4
1020 sub r2, r2, #0x0c
1021 stmdb sp!, {r4, r5}
1022
1023 .Lmemcpy_srcul3loop16:
1024 mov r3, lr, lsr #24
1025 ldmia r1!, {r4, r5, r12, lr}
1026 orr r3, r3, r4, lsl #8
1027 mov r4, r4, lsr #24
1028 orr r4, r4, r5, lsl #8
1029 mov r5, r5, lsr #24
1030 orr r5, r5, r12, lsl #8
1031 mov r12, r12, lsr #24
1032 orr r12, r12, lr, lsl #8
1033 stmia r0!, {r3-r5, r12}
1034 subs r2, r2, #0x10
1035 bge .Lmemcpy_srcul3loop16
1036 ldmia sp!, {r4, r5}
1037 adds r2, r2, #0x0c
1038 blt .Lmemcpy_srcul3l4
1039
1040 .Lmemcpy_srcul3loop4:
1041 mov r12, lr, lsr #24
1042 ldr lr, [r1], #4
1043 orr r12, r12, lr, lsl #8
1044 str r12, [r0], #4
1045 subs r2, r2, #4
1046 bge .Lmemcpy_srcul3loop4
1047
1048 .Lmemcpy_srcul3l4:
1049 sub r1, r1, #1
1050 b .Lmemcpy_l4
1051 #else
1052 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1053 ENTRY(memcpy)
1054 pld [r1]
1055 cmp r2, #0x0c
1056 ble .Lmemcpy_short /* <= 12 bytes */
1057 mov r3, r0 /* We must not clobber r0 */
1058
1059 /* Word-align the destination buffer */
1060 ands ip, r3, #0x03 /* Already word aligned? */
1061 beq .Lmemcpy_wordaligned /* Yup */
1062 cmp ip, #0x02
1063 ldrb ip, [r1], #0x01
1064 sub r2, r2, #0x01
1065 strb ip, [r3], #0x01
1066 ldrleb ip, [r1], #0x01
1067 suble r2, r2, #0x01
1068 strleb ip, [r3], #0x01
1069 ldrltb ip, [r1], #0x01
1070 sublt r2, r2, #0x01
1071 strltb ip, [r3], #0x01
1072
1073 /* Destination buffer is now word aligned */
1074 .Lmemcpy_wordaligned:
1075 ands ip, r1, #0x03 /* Is src also word-aligned? */
1076 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1077
1078 /* Quad-align the destination buffer */
1079 tst r3, #0x07 /* Already quad aligned? */
1080 ldrne ip, [r1], #0x04
1081 stmfd sp!, {r4-r9} /* Free up some registers */
1082 subne r2, r2, #0x04
1083 strne ip, [r3], #0x04
1084
1085 /* Destination buffer quad aligned, source is at least word aligned */
1086 subs r2, r2, #0x80
1087 blt .Lmemcpy_w_lessthan128
1088
1089 /* Copy 128 bytes at a time */
1090 .Lmemcpy_w_loop128:
1091 ldr r4, [r1], #0x04 /* LD:00-03 */
1092 ldr r5, [r1], #0x04 /* LD:04-07 */
1093 pld [r1, #0x18] /* Prefetch 0x20 */
1094 ldr r6, [r1], #0x04 /* LD:08-0b */
1095 ldr r7, [r1], #0x04 /* LD:0c-0f */
1096 ldr r8, [r1], #0x04 /* LD:10-13 */
1097 ldr r9, [r1], #0x04 /* LD:14-17 */
1098 strd r4, [r3], #0x08 /* ST:00-07 */
1099 ldr r4, [r1], #0x04 /* LD:18-1b */
1100 ldr r5, [r1], #0x04 /* LD:1c-1f */
1101 strd r6, [r3], #0x08 /* ST:08-0f */
1102 ldr r6, [r1], #0x04 /* LD:20-23 */
1103 ldr r7, [r1], #0x04 /* LD:24-27 */
1104 pld [r1, #0x18] /* Prefetch 0x40 */
1105 strd r8, [r3], #0x08 /* ST:10-17 */
1106 ldr r8, [r1], #0x04 /* LD:28-2b */
1107 ldr r9, [r1], #0x04 /* LD:2c-2f */
1108 strd r4, [r3], #0x08 /* ST:18-1f */
1109 ldr r4, [r1], #0x04 /* LD:30-33 */
1110 ldr r5, [r1], #0x04 /* LD:34-37 */
1111 strd r6, [r3], #0x08 /* ST:20-27 */
1112 ldr r6, [r1], #0x04 /* LD:38-3b */
1113 ldr r7, [r1], #0x04 /* LD:3c-3f */
1114 strd r8, [r3], #0x08 /* ST:28-2f */
1115 ldr r8, [r1], #0x04 /* LD:40-43 */
1116 ldr r9, [r1], #0x04 /* LD:44-47 */
1117 pld [r1, #0x18] /* Prefetch 0x60 */
1118 strd r4, [r3], #0x08 /* ST:30-37 */
1119 ldr r4, [r1], #0x04 /* LD:48-4b */
1120 ldr r5, [r1], #0x04 /* LD:4c-4f */
1121 strd r6, [r3], #0x08 /* ST:38-3f */
1122 ldr r6, [r1], #0x04 /* LD:50-53 */
1123 ldr r7, [r1], #0x04 /* LD:54-57 */
1124 strd r8, [r3], #0x08 /* ST:40-47 */
1125 ldr r8, [r1], #0x04 /* LD:58-5b */
1126 ldr r9, [r1], #0x04 /* LD:5c-5f */
1127 strd r4, [r3], #0x08 /* ST:48-4f */
1128 ldr r4, [r1], #0x04 /* LD:60-63 */
1129 ldr r5, [r1], #0x04 /* LD:64-67 */
1130 pld [r1, #0x18] /* Prefetch 0x80 */
1131 strd r6, [r3], #0x08 /* ST:50-57 */
1132 ldr r6, [r1], #0x04 /* LD:68-6b */
1133 ldr r7, [r1], #0x04 /* LD:6c-6f */
1134 strd r8, [r3], #0x08 /* ST:58-5f */
1135 ldr r8, [r1], #0x04 /* LD:70-73 */
1136 ldr r9, [r1], #0x04 /* LD:74-77 */
1137 strd r4, [r3], #0x08 /* ST:60-67 */
1138 ldr r4, [r1], #0x04 /* LD:78-7b */
1139 ldr r5, [r1], #0x04 /* LD:7c-7f */
1140 strd r6, [r3], #0x08 /* ST:68-6f */
1141 strd r8, [r3], #0x08 /* ST:70-77 */
1142 subs r2, r2, #0x80
1143 strd r4, [r3], #0x08 /* ST:78-7f */
1144 bge .Lmemcpy_w_loop128
1145
1146 .Lmemcpy_w_lessthan128:
1147 adds r2, r2, #0x80 /* Adjust for extra sub */
1148 ldmeqfd sp!, {r4-r9}
1149 RETeq /* Return now if done */
1150 subs r2, r2, #0x20
1151 blt .Lmemcpy_w_lessthan32
1152
1153 /* Copy 32 bytes at a time */
1154 .Lmemcpy_w_loop32:
1155 ldr r4, [r1], #0x04
1156 ldr r5, [r1], #0x04
1157 pld [r1, #0x18]
1158 ldr r6, [r1], #0x04
1159 ldr r7, [r1], #0x04
1160 ldr r8, [r1], #0x04
1161 ldr r9, [r1], #0x04
1162 strd r4, [r3], #0x08
1163 ldr r4, [r1], #0x04
1164 ldr r5, [r1], #0x04
1165 strd r6, [r3], #0x08
1166 strd r8, [r3], #0x08
1167 subs r2, r2, #0x20
1168 strd r4, [r3], #0x08
1169 bge .Lmemcpy_w_loop32
1170
1171 .Lmemcpy_w_lessthan32:
1172 adds r2, r2, #0x20 /* Adjust for extra sub */
1173 ldmeqfd sp!, {r4-r9}
1174 RETeq /* Return now if done */
1175
1176 and r4, r2, #0x18
1177 rsbs r4, r4, #0x18
1178 addne pc, pc, r4, lsl #1
1179 nop
1180
1181 /* At least 24 bytes remaining */
1182 ldr r4, [r1], #0x04
1183 ldr r5, [r1], #0x04
1184 sub r2, r2, #0x08
1185 strd r4, [r3], #0x08
1186
1187 /* At least 16 bytes remaining */
1188 ldr r4, [r1], #0x04
1189 ldr r5, [r1], #0x04
1190 sub r2, r2, #0x08
1191 strd r4, [r3], #0x08
1192
1193 /* At least 8 bytes remaining */
1194 ldr r4, [r1], #0x04
1195 ldr r5, [r1], #0x04
1196 subs r2, r2, #0x08
1197 strd r4, [r3], #0x08
1198
1199 /* Less than 8 bytes remaining */
1200 ldmfd sp!, {r4-r9}
1201 RETeq /* Return now if done */
1202 subs r2, r2, #0x04
1203 ldrge ip, [r1], #0x04
1204 strge ip, [r3], #0x04
1205 RETeq /* Return now if done */
1206 addlt r2, r2, #0x04
1207 ldrb ip, [r1], #0x01
1208 cmp r2, #0x02
1209 ldrgeb r2, [r1], #0x01
1210 strb ip, [r3], #0x01
1211 ldrgtb ip, [r1]
1212 strgeb r2, [r3], #0x01
1213 strgtb ip, [r3]
1214 RET
1215
1216
1217 /*
1218 * At this point, it has not been possible to word align both buffers.
1219 * The destination buffer is word aligned, but the source buffer is not.
1220 */
1221 .Lmemcpy_bad_align:
1222 stmfd sp!, {r4-r7}
1223 bic r1, r1, #0x03
1224 cmp ip, #2
1225 ldr ip, [r1], #0x04
1226 bgt .Lmemcpy_bad3
1227 beq .Lmemcpy_bad2
1228 b .Lmemcpy_bad1
1229
1230 .Lmemcpy_bad1_loop16:
1231 #ifdef __ARMEB__
1232 mov r4, ip, lsl #8
1233 #else
1234 mov r4, ip, lsr #8
1235 #endif
1236 ldr r5, [r1], #0x04
1237 pld [r1, #0x018]
1238 ldr r6, [r1], #0x04
1239 ldr r7, [r1], #0x04
1240 ldr ip, [r1], #0x04
1241 #ifdef __ARMEB__
1242 orr r4, r4, r5, lsr #24
1243 mov r5, r5, lsl #8
1244 orr r5, r5, r6, lsr #24
1245 mov r6, r6, lsl #8
1246 orr r6, r6, r7, lsr #24
1247 mov r7, r7, lsl #8
1248 orr r7, r7, ip, lsr #24
1249 #else
1250 orr r4, r4, r5, lsl #24
1251 mov r5, r5, lsr #8
1252 orr r5, r5, r6, lsl #24
1253 mov r6, r6, lsr #8
1254 orr r6, r6, r7, lsl #24
1255 mov r7, r7, lsr #8
1256 orr r7, r7, ip, lsl #24
1257 #endif
1258 str r4, [r3], #0x04
1259 str r5, [r3], #0x04
1260 str r6, [r3], #0x04
1261 str r7, [r3], #0x04
1262 .Lmemcpy_bad1:
1263 subs r2, r2, #0x10
1264 bge .Lmemcpy_bad1_loop16
1265
1266 adds r2, r2, #0x10
1267 ldmeqfd sp!, {r4-r7}
1268 RETeq /* Return now if done */
1269 subs r2, r2, #0x04
1270 sublt r1, r1, #0x03
1271 blt .Lmemcpy_bad_done
1272
1273 .Lmemcpy_bad1_loop4:
1274 #ifdef __ARMEB__
1275 mov r4, ip, lsl #8
1276 #else
1277 mov r4, ip, lsr #8
1278 #endif
1279 ldr ip, [r1], #0x04
1280 subs r2, r2, #0x04
1281 #ifdef __ARMEB__
1282 orr r4, r4, ip, lsr #24
1283 #else
1284 orr r4, r4, ip, lsl #24
1285 #endif
1286 str r4, [r3], #0x04
1287 bge .Lmemcpy_bad1_loop4
1288 sub r1, r1, #0x03
1289 b .Lmemcpy_bad_done
1290
1291 .Lmemcpy_bad2_loop16:
1292 #ifdef __ARMEB__
1293 mov r4, ip, lsl #16
1294 #else
1295 mov r4, ip, lsr #16
1296 #endif
1297 ldr r5, [r1], #0x04
1298 pld [r1, #0x018]
1299 ldr r6, [r1], #0x04
1300 ldr r7, [r1], #0x04
1301 ldr ip, [r1], #0x04
1302 #ifdef __ARMEB__
1303 orr r4, r4, r5, lsr #16
1304 mov r5, r5, lsl #16
1305 orr r5, r5, r6, lsr #16
1306 mov r6, r6, lsl #16
1307 orr r6, r6, r7, lsr #16
1308 mov r7, r7, lsl #16
1309 orr r7, r7, ip, lsr #16
1310 #else
1311 orr r4, r4, r5, lsl #16
1312 mov r5, r5, lsr #16
1313 orr r5, r5, r6, lsl #16
1314 mov r6, r6, lsr #16
1315 orr r6, r6, r7, lsl #16
1316 mov r7, r7, lsr #16
1317 orr r7, r7, ip, lsl #16
1318 #endif
1319 str r4, [r3], #0x04
1320 str r5, [r3], #0x04
1321 str r6, [r3], #0x04
1322 str r7, [r3], #0x04
1323 .Lmemcpy_bad2:
1324 subs r2, r2, #0x10
1325 bge .Lmemcpy_bad2_loop16
1326
1327 adds r2, r2, #0x10
1328 ldmeqfd sp!, {r4-r7}
1329 RETeq /* Return now if done */
1330 subs r2, r2, #0x04
1331 sublt r1, r1, #0x02
1332 blt .Lmemcpy_bad_done
1333
1334 .Lmemcpy_bad2_loop4:
1335 #ifdef __ARMEB__
1336 mov r4, ip, lsl #16
1337 #else
1338 mov r4, ip, lsr #16
1339 #endif
1340 ldr ip, [r1], #0x04
1341 subs r2, r2, #0x04
1342 #ifdef __ARMEB__
1343 orr r4, r4, ip, lsr #16
1344 #else
1345 orr r4, r4, ip, lsl #16
1346 #endif
1347 str r4, [r3], #0x04
1348 bge .Lmemcpy_bad2_loop4
1349 sub r1, r1, #0x02
1350 b .Lmemcpy_bad_done
1351
1352 .Lmemcpy_bad3_loop16:
1353 #ifdef __ARMEB__
1354 mov r4, ip, lsl #24
1355 #else
1356 mov r4, ip, lsr #24
1357 #endif
1358 ldr r5, [r1], #0x04
1359 pld [r1, #0x018]
1360 ldr r6, [r1], #0x04
1361 ldr r7, [r1], #0x04
1362 ldr ip, [r1], #0x04
1363 #ifdef __ARMEB__
1364 orr r4, r4, r5, lsr #8
1365 mov r5, r5, lsl #24
1366 orr r5, r5, r6, lsr #8
1367 mov r6, r6, lsl #24
1368 orr r6, r6, r7, lsr #8
1369 mov r7, r7, lsl #24
1370 orr r7, r7, ip, lsr #8
1371 #else
1372 orr r4, r4, r5, lsl #8
1373 mov r5, r5, lsr #24
1374 orr r5, r5, r6, lsl #8
1375 mov r6, r6, lsr #24
1376 orr r6, r6, r7, lsl #8
1377 mov r7, r7, lsr #24
1378 orr r7, r7, ip, lsl #8
1379 #endif
1380 str r4, [r3], #0x04
1381 str r5, [r3], #0x04
1382 str r6, [r3], #0x04
1383 str r7, [r3], #0x04
1384 .Lmemcpy_bad3:
1385 subs r2, r2, #0x10
1386 bge .Lmemcpy_bad3_loop16
1387
1388 adds r2, r2, #0x10
1389 ldmeqfd sp!, {r4-r7}
1390 RETeq /* Return now if done */
1391 subs r2, r2, #0x04
1392 sublt r1, r1, #0x01
1393 blt .Lmemcpy_bad_done
1394
1395 .Lmemcpy_bad3_loop4:
1396 #ifdef __ARMEB__
1397 mov r4, ip, lsl #24
1398 #else
1399 mov r4, ip, lsr #24
1400 #endif
1401 ldr ip, [r1], #0x04
1402 subs r2, r2, #0x04
1403 #ifdef __ARMEB__
1404 orr r4, r4, ip, lsr #8
1405 #else
1406 orr r4, r4, ip, lsl #8
1407 #endif
1408 str r4, [r3], #0x04
1409 bge .Lmemcpy_bad3_loop4
1410 sub r1, r1, #0x01
1411
1412 .Lmemcpy_bad_done:
1413 ldmfd sp!, {r4-r7}
1414 adds r2, r2, #0x04
1415 RETeq
1416 ldrb ip, [r1], #0x01
1417 cmp r2, #0x02
1418 ldrgeb r2, [r1], #0x01
1419 strb ip, [r3], #0x01
1420 ldrgtb ip, [r1]
1421 strgeb r2, [r3], #0x01
1422 strgtb ip, [r3]
1423 RET
1424
1425
1426 /*
1427 * Handle short copies (less than 16 bytes), possibly misaligned.
1428 * Some of these are *very* common, thanks to the network stack,
1429 * and so are handled specially.
1430 */
1431 .Lmemcpy_short:
1432 add pc, pc, r2, lsl #2
1433 nop
1434 RET /* 0x00 */
1435 b .Lmemcpy_bytewise /* 0x01 */
1436 b .Lmemcpy_bytewise /* 0x02 */
1437 b .Lmemcpy_bytewise /* 0x03 */
1438 b .Lmemcpy_4 /* 0x04 */
1439 b .Lmemcpy_bytewise /* 0x05 */
1440 b .Lmemcpy_6 /* 0x06 */
1441 b .Lmemcpy_bytewise /* 0x07 */
1442 b .Lmemcpy_8 /* 0x08 */
1443 b .Lmemcpy_bytewise /* 0x09 */
1444 b .Lmemcpy_bytewise /* 0x0a */
1445 b .Lmemcpy_bytewise /* 0x0b */
1446 b .Lmemcpy_c /* 0x0c */
1447 .Lmemcpy_bytewise:
1448 mov r3, r0 /* We must not clobber r0 */
1449 ldrb ip, [r1], #0x01
1450 1: subs r2, r2, #0x01
1451 strb ip, [r3], #0x01
1452 ldrneb ip, [r1], #0x01
1453 bne 1b
1454 RET
1455
1456 /******************************************************************************
1457 * Special case for 4 byte copies
1458 */
1459 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1460 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1461 LMEMCPY_4_PAD
1462 .Lmemcpy_4:
1463 and r2, r1, #0x03
1464 orr r2, r2, r0, lsl #2
1465 ands r2, r2, #0x0f
1466 sub r3, pc, #0x14
1467 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1468
1469 /*
1470 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1471 */
1472 ldr r2, [r1]
1473 str r2, [r0]
1474 RET
1475 LMEMCPY_4_PAD
1476
1477 /*
1478 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1479 */
1480 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1481 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1482 #ifdef __ARMEB__
1483 mov r3, r3, lsl #8 /* r3 = 012. */
1484 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1485 #else
1486 mov r3, r3, lsr #8 /* r3 = .210 */
1487 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1488 #endif
1489 str r3, [r0]
1490 RET
1491 LMEMCPY_4_PAD
1492
1493 /*
1494 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1495 */
1496 #ifdef __ARMEB__
1497 ldrh r3, [r1]
1498 ldrh r2, [r1, #0x02]
1499 #else
1500 ldrh r3, [r1, #0x02]
1501 ldrh r2, [r1]
1502 #endif
1503 orr r3, r2, r3, lsl #16
1504 str r3, [r0]
1505 RET
1506 LMEMCPY_4_PAD
1507
1508 /*
1509 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1510 */
1511 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1512 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1513 #ifdef __ARMEB__
1514 mov r3, r3, lsl #24 /* r3 = 0... */
1515 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1516 #else
1517 mov r3, r3, lsr #24 /* r3 = ...0 */
1518 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1519 #endif
1520 str r3, [r0]
1521 RET
1522 LMEMCPY_4_PAD
1523
1524 /*
1525 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1526 */
1527 ldr r2, [r1]
1528 #ifdef __ARMEB__
1529 strb r2, [r0, #0x03]
1530 mov r3, r2, lsr #8
1531 mov r1, r2, lsr #24
1532 strb r1, [r0]
1533 #else
1534 strb r2, [r0]
1535 mov r3, r2, lsr #8
1536 mov r1, r2, lsr #24
1537 strb r1, [r0, #0x03]
1538 #endif
1539 strh r3, [r0, #0x01]
1540 RET
1541 LMEMCPY_4_PAD
1542
1543 /*
1544 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1545 */
1546 ldrb r2, [r1]
1547 ldrh r3, [r1, #0x01]
1548 ldrb r1, [r1, #0x03]
1549 strb r2, [r0]
1550 strh r3, [r0, #0x01]
1551 strb r1, [r0, #0x03]
1552 RET
1553 LMEMCPY_4_PAD
1554
1555 /*
1556 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1557 */
1558 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1559 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1560 #ifdef __ARMEB__
1561 mov r1, r2, lsr #8 /* r1 = ...0 */
1562 strb r1, [r0]
1563 mov r2, r2, lsl #8 /* r2 = .01. */
1564 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1565 #else
1566 strb r2, [r0]
1567 mov r2, r2, lsr #8 /* r2 = ...1 */
1568 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1569 mov r3, r3, lsr #8 /* r3 = ...3 */
1570 #endif
1571 strh r2, [r0, #0x01]
1572 strb r3, [r0, #0x03]
1573 RET
1574 LMEMCPY_4_PAD
1575
1576 /*
1577 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1578 */
1579 ldrb r2, [r1]
1580 ldrh r3, [r1, #0x01]
1581 ldrb r1, [r1, #0x03]
1582 strb r2, [r0]
1583 strh r3, [r0, #0x01]
1584 strb r1, [r0, #0x03]
1585 RET
1586 LMEMCPY_4_PAD
1587
1588 /*
1589 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1590 */
1591 ldr r2, [r1]
1592 #ifdef __ARMEB__
1593 strh r2, [r0, #0x02]
1594 mov r3, r2, lsr #16
1595 strh r3, [r0]
1596 #else
1597 strh r2, [r0]
1598 mov r3, r2, lsr #16
1599 strh r3, [r0, #0x02]
1600 #endif
1601 RET
1602 LMEMCPY_4_PAD
1603
1604 /*
1605 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1606 */
1607 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1608 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1609 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1610 strh r1, [r0]
1611 #ifdef __ARMEB__
1612 mov r2, r2, lsl #8 /* r2 = 012. */
1613 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1614 #else
1615 mov r2, r2, lsr #24 /* r2 = ...2 */
1616 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1617 #endif
1618 strh r2, [r0, #0x02]
1619 RET
1620 LMEMCPY_4_PAD
1621
1622 /*
1623 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1624 */
1625 ldrh r2, [r1]
1626 ldrh r3, [r1, #0x02]
1627 strh r2, [r0]
1628 strh r3, [r0, #0x02]
1629 RET
1630 LMEMCPY_4_PAD
1631
1632 /*
1633 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1634 */
1635 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1636 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1637 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1638 strh r1, [r0, #0x02]
1639 #ifdef __ARMEB__
1640 mov r3, r3, lsr #24 /* r3 = ...1 */
1641 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1642 #else
1643 mov r3, r3, lsl #8 /* r3 = 321. */
1644 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1645 #endif
1646 strh r3, [r0]
1647 RET
1648 LMEMCPY_4_PAD
1649
1650 /*
1651 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1652 */
1653 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1654 #ifdef __ARMEB__
1655 strb r2, [r0, #0x03]
1656 mov r3, r2, lsr #8
1657 mov r1, r2, lsr #24
1658 strh r3, [r0, #0x01]
1659 strb r1, [r0]
1660 #else
1661 strb r2, [r0]
1662 mov r3, r2, lsr #8
1663 mov r1, r2, lsr #24
1664 strh r3, [r0, #0x01]
1665 strb r1, [r0, #0x03]
1666 #endif
1667 RET
1668 LMEMCPY_4_PAD
1669
1670 /*
1671 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1672 */
1673 ldrb r2, [r1]
1674 ldrh r3, [r1, #0x01]
1675 ldrb r1, [r1, #0x03]
1676 strb r2, [r0]
1677 strh r3, [r0, #0x01]
1678 strb r1, [r0, #0x03]
1679 RET
1680 LMEMCPY_4_PAD
1681
1682 /*
1683 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1684 */
1685 #ifdef __ARMEB__
1686 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1687 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1688 strb r3, [r0, #0x03]
1689 mov r3, r3, lsr #8 /* r3 = ...2 */
1690 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1691 strh r3, [r0, #0x01]
1692 mov r2, r2, lsr #8 /* r2 = ...0 */
1693 strb r2, [r0]
1694 #else
1695 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1696 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1697 strb r2, [r0]
1698 mov r2, r2, lsr #8 /* r2 = ...1 */
1699 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1700 strh r2, [r0, #0x01]
1701 mov r3, r3, lsr #8 /* r3 = ...3 */
1702 strb r3, [r0, #0x03]
1703 #endif
1704 RET
1705 LMEMCPY_4_PAD
1706
1707 /*
1708 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1709 */
1710 ldrb r2, [r1]
1711 ldrh r3, [r1, #0x01]
1712 ldrb r1, [r1, #0x03]
1713 strb r2, [r0]
1714 strh r3, [r0, #0x01]
1715 strb r1, [r0, #0x03]
1716 RET
1717 LMEMCPY_4_PAD
1718
1719
1720 /******************************************************************************
1721 * Special case for 6 byte copies
1722 */
1723 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1724 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1725 LMEMCPY_6_PAD
1726 .Lmemcpy_6:
1727 and r2, r1, #0x03
1728 orr r2, r2, r0, lsl #2
1729 ands r2, r2, #0x0f
1730 sub r3, pc, #0x14
1731 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1732
1733 /*
1734 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1735 */
1736 ldr r2, [r1]
1737 ldrh r3, [r1, #0x04]
1738 str r2, [r0]
1739 strh r3, [r0, #0x04]
1740 RET
1741 LMEMCPY_6_PAD
1742
1743 /*
1744 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1745 */
1746 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1747 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1748 #ifdef __ARMEB__
1749 mov r2, r2, lsl #8 /* r2 = 012. */
1750 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1751 #else
1752 mov r2, r2, lsr #8 /* r2 = .210 */
1753 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1754 #endif
1755 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1756 str r2, [r0]
1757 strh r3, [r0, #0x04]
1758 RET
1759 LMEMCPY_6_PAD
1760
1761 /*
1762 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1763 */
1764 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1765 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1766 #ifdef __ARMEB__
1767 mov r1, r3, lsr #16 /* r1 = ..23 */
1768 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1769 str r1, [r0]
1770 strh r3, [r0, #0x04]
1771 #else
1772 mov r1, r3, lsr #16 /* r1 = ..54 */
1773 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1774 str r2, [r0]
1775 strh r1, [r0, #0x04]
1776 #endif
1777 RET
1778 LMEMCPY_6_PAD
1779
1780 /*
1781 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1782 */
1783 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1784 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1785 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1786 #ifdef __ARMEB__
1787 mov r2, r2, lsl #24 /* r2 = 0... */
1788 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1789 mov r3, r3, lsl #8 /* r3 = 234. */
1790 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1791 #else
1792 mov r2, r2, lsr #24 /* r2 = ...0 */
1793 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1794 mov r1, r1, lsl #8 /* r1 = xx5. */
1795 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1796 #endif
1797 str r2, [r0]
1798 strh r1, [r0, #0x04]
1799 RET
1800 LMEMCPY_6_PAD
1801
1802 /*
1803 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1804 */
1805 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1806 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1807 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1808 strh r1, [r0, #0x01]
1809 #ifdef __ARMEB__
1810 mov r1, r3, lsr #24 /* r1 = ...0 */
1811 strb r1, [r0]
1812 mov r3, r3, lsl #8 /* r3 = 123. */
1813 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1814 #else
1815 strb r3, [r0]
1816 mov r3, r3, lsr #24 /* r3 = ...3 */
1817 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1818 mov r2, r2, lsr #8 /* r2 = ...5 */
1819 #endif
1820 strh r3, [r0, #0x03]
1821 strb r2, [r0, #0x05]
1822 RET
1823 LMEMCPY_6_PAD
1824
1825 /*
1826 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1827 */
1828 ldrb r2, [r1]
1829 ldrh r3, [r1, #0x01]
1830 ldrh ip, [r1, #0x03]
1831 ldrb r1, [r1, #0x05]
1832 strb r2, [r0]
1833 strh r3, [r0, #0x01]
1834 strh ip, [r0, #0x03]
1835 strb r1, [r0, #0x05]
1836 RET
1837 LMEMCPY_6_PAD
1838
1839 /*
1840 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1841 */
1842 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1843 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1844 #ifdef __ARMEB__
1845 mov r3, r2, lsr #8 /* r3 = ...0 */
1846 strb r3, [r0]
1847 strb r1, [r0, #0x05]
1848 mov r3, r1, lsr #8 /* r3 = .234 */
1849 strh r3, [r0, #0x03]
1850 mov r3, r2, lsl #8 /* r3 = .01. */
1851 orr r3, r3, r1, lsr #24 /* r3 = .012 */
1852 strh r3, [r0, #0x01]
1853 #else
1854 strb r2, [r0]
1855 mov r3, r1, lsr #24
1856 strb r3, [r0, #0x05]
1857 mov r3, r1, lsr #8 /* r3 = .543 */
1858 strh r3, [r0, #0x03]
1859 mov r3, r2, lsr #8 /* r3 = ...1 */
1860 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
1861 strh r3, [r0, #0x01]
1862 #endif
1863 RET
1864 LMEMCPY_6_PAD
1865
1866 /*
1867 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1868 */
1869 ldrb r2, [r1]
1870 ldrh r3, [r1, #0x01]
1871 ldrh ip, [r1, #0x03]
1872 ldrb r1, [r1, #0x05]
1873 strb r2, [r0]
1874 strh r3, [r0, #0x01]
1875 strh ip, [r0, #0x03]
1876 strb r1, [r0, #0x05]
1877 RET
1878 LMEMCPY_6_PAD
1879
1880 /*
1881 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1882 */
1883 #ifdef __ARMEB__
1884 ldr r2, [r1] /* r2 = 0123 */
1885 ldrh r3, [r1, #0x04] /* r3 = ..45 */
1886 mov r1, r2, lsr #16 /* r1 = ..01 */
1887 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
1888 strh r1, [r0]
1889 str r3, [r0, #0x02]
1890 #else
1891 ldrh r2, [r1, #0x04] /* r2 = ..54 */
1892 ldr r3, [r1] /* r3 = 3210 */
1893 mov r2, r2, lsl #16 /* r2 = 54.. */
1894 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
1895 strh r3, [r0]
1896 str r2, [r0, #0x02]
1897 #endif
1898 RET
1899 LMEMCPY_6_PAD
1900
1901 /*
1902 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1903 */
1904 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1905 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
1906 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1907 #ifdef __ARMEB__
1908 mov r2, r2, lsr #8 /* r2 = .345 */
1909 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
1910 #else
1911 mov r2, r2, lsl #8 /* r2 = 543. */
1912 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
1913 #endif
1914 strh r1, [r0]
1915 str r2, [r0, #0x02]
1916 RET
1917 LMEMCPY_6_PAD
1918
1919 /*
1920 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1921 */
1922 ldrh r2, [r1]
1923 ldr r3, [r1, #0x02]
1924 strh r2, [r0]
1925 str r3, [r0, #0x02]
1926 RET
1927 LMEMCPY_6_PAD
1928
1929 /*
1930 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1931 */
1932 ldrb r3, [r1] /* r3 = ...0 */
1933 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1934 ldrb r1, [r1, #0x05] /* r1 = ...5 */
1935 #ifdef __ARMEB__
1936 mov r3, r3, lsl #8 /* r3 = ..0. */
1937 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
1938 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
1939 #else
1940 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1941 mov r1, r1, lsl #24 /* r1 = 5... */
1942 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
1943 #endif
1944 strh r3, [r0]
1945 str r1, [r0, #0x02]
1946 RET
1947 LMEMCPY_6_PAD
1948
1949 /*
1950 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1951 */
1952 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1953 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
1954 #ifdef __ARMEB__
1955 mov r3, r2, lsr #24 /* r3 = ...0 */
1956 strb r3, [r0]
1957 mov r2, r2, lsl #8 /* r2 = 123. */
1958 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
1959 #else
1960 strb r2, [r0]
1961 mov r2, r2, lsr #8 /* r2 = .321 */
1962 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
1963 mov r1, r1, lsr #8 /* r1 = ...5 */
1964 #endif
1965 str r2, [r0, #0x01]
1966 strb r1, [r0, #0x05]
1967 RET
1968 LMEMCPY_6_PAD
1969
1970 /*
1971 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1972 */
1973 ldrb r2, [r1]
1974 ldrh r3, [r1, #0x01]
1975 ldrh ip, [r1, #0x03]
1976 ldrb r1, [r1, #0x05]
1977 strb r2, [r0]
1978 strh r3, [r0, #0x01]
1979 strh ip, [r0, #0x03]
1980 strb r1, [r0, #0x05]
1981 RET
1982 LMEMCPY_6_PAD
1983
1984 /*
1985 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1986 */
1987 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1988 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1989 #ifdef __ARMEB__
1990 mov r3, r2, lsr #8 /* r3 = ...0 */
1991 strb r3, [r0]
1992 mov r2, r2, lsl #24 /* r2 = 1... */
1993 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
1994 #else
1995 strb r2, [r0]
1996 mov r2, r2, lsr #8 /* r2 = ...1 */
1997 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
1998 mov r1, r1, lsr #24 /* r1 = ...5 */
1999 #endif
2000 str r2, [r0, #0x01]
2001 strb r1, [r0, #0x05]
2002 RET
2003 LMEMCPY_6_PAD
2004
2005 /*
2006 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2007 */
2008 ldrb r2, [r1]
2009 ldr r3, [r1, #0x01]
2010 ldrb r1, [r1, #0x05]
2011 strb r2, [r0]
2012 str r3, [r0, #0x01]
2013 strb r1, [r0, #0x05]
2014 RET
2015 LMEMCPY_6_PAD
2016
2017
2018 /******************************************************************************
2019 * Special case for 8 byte copies
2020 */
2021 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2022 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2023 LMEMCPY_8_PAD
2024 .Lmemcpy_8:
2025 and r2, r1, #0x03
2026 orr r2, r2, r0, lsl #2
2027 ands r2, r2, #0x0f
2028 sub r3, pc, #0x14
2029 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2030
2031 /*
2032 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2033 */
2034 ldr r2, [r1]
2035 ldr r3, [r1, #0x04]
2036 str r2, [r0]
2037 str r3, [r0, #0x04]
2038 RET
2039 LMEMCPY_8_PAD
2040
2041 /*
2042 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2043 */
2044 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2045 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2046 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2047 #ifdef __ARMEB__
2048 mov r3, r3, lsl #8 /* r3 = 012. */
2049 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2050 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2051 #else
2052 mov r3, r3, lsr #8 /* r3 = .210 */
2053 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2054 mov r1, r1, lsl #24 /* r1 = 7... */
2055 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2056 #endif
2057 str r3, [r0]
2058 str r2, [r0, #0x04]
2059 RET
2060 LMEMCPY_8_PAD
2061
2062 /*
2063 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2064 */
2065 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2066 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2067 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2068 #ifdef __ARMEB__
2069 mov r2, r2, lsl #16 /* r2 = 01.. */
2070 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2071 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2072 #else
2073 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2074 mov r3, r3, lsr #16 /* r3 = ..54 */
2075 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2076 #endif
2077 str r2, [r0]
2078 str r3, [r0, #0x04]
2079 RET
2080 LMEMCPY_8_PAD
2081
2082 /*
2083 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2084 */
2085 ldrb r3, [r1] /* r3 = ...0 */
2086 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2087 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2088 #ifdef __ARMEB__
2089 mov r3, r3, lsl #24 /* r3 = 0... */
2090 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2091 mov r2, r2, lsl #24 /* r2 = 4... */
2092 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2093 #else
2094 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2095 mov r2, r2, lsr #24 /* r2 = ...4 */
2096 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2097 #endif
2098 str r3, [r0]
2099 str r2, [r0, #0x04]
2100 RET
2101 LMEMCPY_8_PAD
2102
2103 /*
2104 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2105 */
2106 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2107 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2108 #ifdef __ARMEB__
2109 mov r1, r3, lsr #24 /* r1 = ...0 */
2110 strb r1, [r0]
2111 mov r1, r3, lsr #8 /* r1 = .012 */
2112 strb r2, [r0, #0x07]
2113 mov r3, r3, lsl #24 /* r3 = 3... */
2114 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2115 #else
2116 strb r3, [r0]
2117 mov r1, r2, lsr #24 /* r1 = ...7 */
2118 strb r1, [r0, #0x07]
2119 mov r1, r3, lsr #8 /* r1 = .321 */
2120 mov r3, r3, lsr #24 /* r3 = ...3 */
2121 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2122 #endif
2123 strh r1, [r0, #0x01]
2124 str r3, [r0, #0x03]
2125 RET
2126 LMEMCPY_8_PAD
2127
2128 /*
2129 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2130 */
2131 ldrb r2, [r1]
2132 ldrh r3, [r1, #0x01]
2133 ldr ip, [r1, #0x03]
2134 ldrb r1, [r1, #0x07]
2135 strb r2, [r0]
2136 strh r3, [r0, #0x01]
2137 str ip, [r0, #0x03]
2138 strb r1, [r0, #0x07]
2139 RET
2140 LMEMCPY_8_PAD
2141
2142 /*
2143 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2144 */
2145 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2146 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2147 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2148 #ifdef __ARMEB__
2149 mov ip, r2, lsr #8 /* ip = ...0 */
2150 strb ip, [r0]
2151 mov ip, r2, lsl #8 /* ip = .01. */
2152 orr ip, ip, r3, lsr #24 /* ip = .012 */
2153 strb r1, [r0, #0x07]
2154 mov r3, r3, lsl #8 /* r3 = 345. */
2155 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2156 #else
2157 strb r2, [r0] /* 0 */
2158 mov ip, r1, lsr #8 /* ip = ...7 */
2159 strb ip, [r0, #0x07] /* 7 */
2160 mov ip, r2, lsr #8 /* ip = ...1 */
2161 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2162 mov r3, r3, lsr #8 /* r3 = .543 */
2163 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2164 #endif
2165 strh ip, [r0, #0x01]
2166 str r3, [r0, #0x03]
2167 RET
2168 LMEMCPY_8_PAD
2169
2170 /*
2171 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2172 */
2173 ldrb r3, [r1] /* r3 = ...0 */
2174 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2175 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2176 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2177 strb r3, [r0]
2178 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2179 #ifdef __ARMEB__
2180 strh r3, [r0, #0x01]
2181 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2182 #else
2183 strh ip, [r0, #0x01]
2184 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2185 #endif
2186 str r2, [r0, #0x03]
2187 strb r1, [r0, #0x07]
2188 RET
2189 LMEMCPY_8_PAD
2190
2191 /*
2192 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2193 */
2194 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2195 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2196 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2197 #ifdef __ARMEB__
2198 strh r1, [r0]
2199 mov r1, r3, lsr #16 /* r1 = ..45 */
2200 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2201 #else
2202 strh r2, [r0]
2203 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2204 mov r3, r3, lsr #16 /* r3 = ..76 */
2205 #endif
2206 str r2, [r0, #0x02]
2207 strh r3, [r0, #0x06]
2208 RET
2209 LMEMCPY_8_PAD
2210
2211 /*
2212 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2213 */
2214 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2215 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2216 ldrb ip, [r1, #0x07] /* ip = ...7 */
2217 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2218 strh r1, [r0]
2219 #ifdef __ARMEB__
2220 mov r1, r2, lsl #24 /* r1 = 2... */
2221 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2222 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2223 #else
2224 mov r1, r2, lsr #24 /* r1 = ...2 */
2225 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2226 mov r3, r3, lsr #24 /* r3 = ...6 */
2227 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2228 #endif
2229 str r1, [r0, #0x02]
2230 strh r3, [r0, #0x06]
2231 RET
2232 LMEMCPY_8_PAD
2233
2234 /*
2235 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2236 */
2237 ldrh r2, [r1]
2238 ldr ip, [r1, #0x02]
2239 ldrh r3, [r1, #0x06]
2240 strh r2, [r0]
2241 str ip, [r0, #0x02]
2242 strh r3, [r0, #0x06]
2243 RET
2244 LMEMCPY_8_PAD
2245
2246 /*
2247 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2248 */
2249 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2250 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2251 ldrb ip, [r1] /* ip = ...0 */
2252 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2253 strh r1, [r0, #0x06]
2254 #ifdef __ARMEB__
2255 mov r3, r3, lsr #24 /* r3 = ...5 */
2256 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2257 mov r2, r2, lsr #24 /* r2 = ...1 */
2258 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2259 #else
2260 mov r3, r3, lsl #24 /* r3 = 5... */
2261 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2262 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2263 #endif
2264 str r3, [r0, #0x02]
2265 strh r2, [r0]
2266 RET
2267 LMEMCPY_8_PAD
2268
2269 /*
2270 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2271 */
2272 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2273 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2274 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2275 strh r1, [r0, #0x05]
2276 #ifdef __ARMEB__
2277 strb r3, [r0, #0x07]
2278 mov r1, r2, lsr #24 /* r1 = ...0 */
2279 strb r1, [r0]
2280 mov r2, r2, lsl #8 /* r2 = 123. */
2281 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2282 str r2, [r0, #0x01]
2283 #else
2284 strb r2, [r0]
2285 mov r1, r3, lsr #24 /* r1 = ...7 */
2286 strb r1, [r0, #0x07]
2287 mov r2, r2, lsr #8 /* r2 = .321 */
2288 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2289 str r2, [r0, #0x01]
2290 #endif
2291 RET
2292 LMEMCPY_8_PAD
2293
2294 /*
2295 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2296 */
2297 ldrb r3, [r1] /* r3 = ...0 */
2298 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2299 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2300 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2301 strb r3, [r0]
2302 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2303 #ifdef __ARMEB__
2304 strh ip, [r0, #0x05]
2305 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2306 #else
2307 strh r3, [r0, #0x05]
2308 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2309 #endif
2310 str r2, [r0, #0x01]
2311 strb r1, [r0, #0x07]
2312 RET
2313 LMEMCPY_8_PAD
2314
2315 /*
2316 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2317 */
2318 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2319 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2320 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2321 #ifdef __ARMEB__
2322 mov ip, r2, lsr #8 /* ip = ...0 */
2323 strb ip, [r0]
2324 mov ip, r2, lsl #24 /* ip = 1... */
2325 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2326 strb r1, [r0, #0x07]
2327 mov r1, r1, lsr #8 /* r1 = ...6 */
2328 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2329 #else
2330 strb r2, [r0]
2331 mov ip, r2, lsr #8 /* ip = ...1 */
2332 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2333 mov r2, r1, lsr #8 /* r2 = ...7 */
2334 strb r2, [r0, #0x07]
2335 mov r1, r1, lsl #8 /* r1 = .76. */
2336 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2337 #endif
2338 str ip, [r0, #0x01]
2339 strh r1, [r0, #0x05]
2340 RET
2341 LMEMCPY_8_PAD
2342
2343 /*
2344 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2345 */
2346 ldrb r2, [r1]
2347 ldr ip, [r1, #0x01]
2348 ldrh r3, [r1, #0x05]
2349 ldrb r1, [r1, #0x07]
2350 strb r2, [r0]
2351 str ip, [r0, #0x01]
2352 strh r3, [r0, #0x05]
2353 strb r1, [r0, #0x07]
2354 RET
2355 LMEMCPY_8_PAD
2356
2357 /******************************************************************************
2358 * Special case for 12 byte copies
2359 */
2360 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2361 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2362 LMEMCPY_C_PAD
2363 .Lmemcpy_c:
2364 and r2, r1, #0x03
2365 orr r2, r2, r0, lsl #2
2366 ands r2, r2, #0x0f
2367 sub r3, pc, #0x14
2368 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2369
2370 /*
2371 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2372 */
2373 ldr r2, [r1]
2374 ldr r3, [r1, #0x04]
2375 ldr r1, [r1, #0x08]
2376 str r2, [r0]
2377 str r3, [r0, #0x04]
2378 str r1, [r0, #0x08]
2379 RET
2380 LMEMCPY_C_PAD
2381
2382 /*
2383 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2384 */
2385 ldrb r2, [r1, #0xb] /* r2 = ...B */
2386 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2387 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2388 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2389 #ifdef __ARMEB__
2390 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2391 str r2, [r0, #0x08]
2392 mov r2, ip, lsr #24 /* r2 = ...7 */
2393 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2394 mov r1, r1, lsl #8 /* r1 = 012. */
2395 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2396 #else
2397 mov r2, r2, lsl #24 /* r2 = B... */
2398 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2399 str r2, [r0, #0x08]
2400 mov r2, ip, lsl #24 /* r2 = 7... */
2401 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2402 mov r1, r1, lsr #8 /* r1 = .210 */
2403 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2404 #endif
2405 str r2, [r0, #0x04]
2406 str r1, [r0]
2407 RET
2408 LMEMCPY_C_PAD
2409
2410 /*
2411 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2412 */
2413 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2414 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2415 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2416 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2417 #ifdef __ARMEB__
2418 mov r2, r2, lsl #16 /* r2 = 01.. */
2419 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2420 str r2, [r0]
2421 mov r3, r3, lsl #16 /* r3 = 45.. */
2422 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2423 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2424 #else
2425 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2426 str r2, [r0]
2427 mov r3, r3, lsr #16 /* r3 = ..54 */
2428 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2429 mov r1, r1, lsl #16 /* r1 = BA.. */
2430 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2431 #endif
2432 str r3, [r0, #0x04]
2433 str r1, [r0, #0x08]
2434 RET
2435 LMEMCPY_C_PAD
2436
2437 /*
2438 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2439 */
2440 ldrb r2, [r1] /* r2 = ...0 */
2441 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2442 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2443 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2444 #ifdef __ARMEB__
2445 mov r2, r2, lsl #24 /* r2 = 0... */
2446 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2447 str r2, [r0]
2448 mov r3, r3, lsl #24 /* r3 = 4... */
2449 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2450 mov r1, r1, lsr #8 /* r1 = .9AB */
2451 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2452 #else
2453 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2454 str r2, [r0]
2455 mov r3, r3, lsr #24 /* r3 = ...4 */
2456 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2457 mov r1, r1, lsl #8 /* r1 = BA9. */
2458 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2459 #endif
2460 str r3, [r0, #0x04]
2461 str r1, [r0, #0x08]
2462 RET
2463 LMEMCPY_C_PAD
2464
2465 /*
2466 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2467 */
2468 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2469 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2470 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2471 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2472 strh r1, [r0, #0x01]
2473 #ifdef __ARMEB__
2474 mov r1, r2, lsr #24 /* r1 = ...0 */
2475 strb r1, [r0]
2476 mov r1, r2, lsl #24 /* r1 = 3... */
2477 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2478 mov r1, r3, lsl #24 /* r1 = 7... */
2479 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2480 #else
2481 strb r2, [r0]
2482 mov r1, r2, lsr #24 /* r1 = ...3 */
2483 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2484 mov r1, r3, lsr #24 /* r1 = ...7 */
2485 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2486 mov ip, ip, lsr #24 /* ip = ...B */
2487 #endif
2488 str r2, [r0, #0x03]
2489 str r1, [r0, #0x07]
2490 strb ip, [r0, #0x0b]
2491 RET
2492 LMEMCPY_C_PAD
2493
2494 /*
2495 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2496 */
2497 ldrb r2, [r1]
2498 ldrh r3, [r1, #0x01]
2499 ldr ip, [r1, #0x03]
2500 strb r2, [r0]
2501 ldr r2, [r1, #0x07]
2502 ldrb r1, [r1, #0x0b]
2503 strh r3, [r0, #0x01]
2504 str ip, [r0, #0x03]
2505 str r2, [r0, #0x07]
2506 strb r1, [r0, #0x0b]
2507 RET
2508 LMEMCPY_C_PAD
2509
2510 /*
2511 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2512 */
2513 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2514 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2515 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2516 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2517 #ifdef __ARMEB__
2518 mov r2, r2, ror #8 /* r2 = 1..0 */
2519 strb r2, [r0]
2520 mov r2, r2, lsr #16 /* r2 = ..1. */
2521 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2522 strh r2, [r0, #0x01]
2523 mov r2, r3, lsl #8 /* r2 = 345. */
2524 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2525 mov r2, ip, lsl #8 /* r2 = 789. */
2526 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2527 #else
2528 strb r2, [r0]
2529 mov r2, r2, lsr #8 /* r2 = ...1 */
2530 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2531 strh r2, [r0, #0x01]
2532 mov r2, r3, lsr #8 /* r2 = .543 */
2533 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2534 mov r2, ip, lsr #8 /* r2 = .987 */
2535 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2536 mov r1, r1, lsr #8 /* r1 = ...B */
2537 #endif
2538 str r3, [r0, #0x03]
2539 str r2, [r0, #0x07]
2540 strb r1, [r0, #0x0b]
2541 RET
2542 LMEMCPY_C_PAD
2543
2544 /*
2545 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2546 */
2547 ldrb r2, [r1]
2548 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2549 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2550 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2551 strb r2, [r0]
2552 #ifdef __ARMEB__
2553 mov r2, r3, lsr #16 /* r2 = ..12 */
2554 strh r2, [r0, #0x01]
2555 mov r3, r3, lsl #16 /* r3 = 34.. */
2556 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2557 mov ip, ip, lsl #16 /* ip = 78.. */
2558 orr ip, ip, r1, lsr #16 /* ip = 789A */
2559 mov r1, r1, lsr #8 /* r1 = .9AB */
2560 #else
2561 strh r3, [r0, #0x01]
2562 mov r3, r3, lsr #16 /* r3 = ..43 */
2563 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2564 mov ip, ip, lsr #16 /* ip = ..87 */
2565 orr ip, ip, r1, lsl #16 /* ip = A987 */
2566 mov r1, r1, lsr #16 /* r1 = ..xB */
2567 #endif
2568 str r3, [r0, #0x03]
2569 str ip, [r0, #0x07]
2570 strb r1, [r0, #0x0b]
2571 RET
2572 LMEMCPY_C_PAD
2573
2574 /*
2575 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2576 */
2577 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2578 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2579 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2580 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2581 #ifdef __ARMEB__
2582 strh r1, [r0]
2583 mov r1, ip, lsl #16 /* r1 = 23.. */
2584 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2585 mov r3, r3, lsl #16 /* r3 = 67.. */
2586 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2587 #else
2588 strh ip, [r0]
2589 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2590 mov r3, r3, lsr #16 /* r3 = ..76 */
2591 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2592 mov r2, r2, lsr #16 /* r2 = ..BA */
2593 #endif
2594 str r1, [r0, #0x02]
2595 str r3, [r0, #0x06]
2596 strh r2, [r0, #0x0a]
2597 RET
2598 LMEMCPY_C_PAD
2599
2600 /*
2601 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2602 */
2603 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2604 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2605 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2606 strh ip, [r0]
2607 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2608 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2609 #ifdef __ARMEB__
2610 mov r2, r2, lsl #24 /* r2 = 2... */
2611 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2612 mov r3, r3, lsl #24 /* r3 = 6... */
2613 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2614 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2615 #else
2616 mov r2, r2, lsr #24 /* r2 = ...2 */
2617 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2618 mov r3, r3, lsr #24 /* r3 = ...6 */
2619 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2620 mov r1, r1, lsl #8 /* r1 = ..B. */
2621 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2622 #endif
2623 str r2, [r0, #0x02]
2624 str r3, [r0, #0x06]
2625 strh r1, [r0, #0x0a]
2626 RET
2627 LMEMCPY_C_PAD
2628
2629 /*
2630 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2631 */
2632 ldrh r2, [r1]
2633 ldr r3, [r1, #0x02]
2634 ldr ip, [r1, #0x06]
2635 ldrh r1, [r1, #0x0a]
2636 strh r2, [r0]
2637 str r3, [r0, #0x02]
2638 str ip, [r0, #0x06]
2639 strh r1, [r0, #0x0a]
2640 RET
2641 LMEMCPY_C_PAD
2642
2643 /*
2644 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2645 */
2646 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2647 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2648 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2649 strh ip, [r0, #0x0a]
2650 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2651 ldrb r1, [r1] /* r1 = ...0 */
2652 #ifdef __ARMEB__
2653 mov r2, r2, lsr #24 /* r2 = ...9 */
2654 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2655 mov r3, r3, lsr #24 /* r3 = ...5 */
2656 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2657 mov r1, r1, lsl #8 /* r1 = ..0. */
2658 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2659 #else
2660 mov r2, r2, lsl #24 /* r2 = 9... */
2661 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2662 mov r3, r3, lsl #24 /* r3 = 5... */
2663 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2664 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2665 #endif
2666 str r2, [r0, #0x06]
2667 str r3, [r0, #0x02]
2668 strh r1, [r0]
2669 RET
2670 LMEMCPY_C_PAD
2671
2672 /*
2673 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2674 */
2675 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2676 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2677 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2678 #ifdef __ARMEB__
2679 mov r3, r2, lsr #24 /* r3 = ...0 */
2680 strb r3, [r0]
2681 mov r2, r2, lsl #8 /* r2 = 123. */
2682 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2683 str r2, [r0, #0x01]
2684 mov r2, ip, lsl #8 /* r2 = 567. */
2685 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2686 str r2, [r0, #0x05]
2687 mov r2, r1, lsr #8 /* r2 = ..9A */
2688 strh r2, [r0, #0x09]
2689 strb r1, [r0, #0x0b]
2690 #else
2691 strb r2, [r0]
2692 mov r3, r2, lsr #8 /* r3 = .321 */
2693 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2694 str r3, [r0, #0x01]
2695 mov r3, ip, lsr #8 /* r3 = .765 */
2696 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2697 str r3, [r0, #0x05]
2698 mov r1, r1, lsr #8 /* r1 = .BA9 */
2699 strh r1, [r0, #0x09]
2700 mov r1, r1, lsr #16 /* r1 = ...B */
2701 strb r1, [r0, #0x0b]
2702 #endif
2703 RET
2704 LMEMCPY_C_PAD
2705
2706 /*
2707 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2708 */
2709 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2710 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2711 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2712 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2713 strb r2, [r0, #0x0b]
2714 #ifdef __ARMEB__
2715 strh r3, [r0, #0x09]
2716 mov r3, r3, lsr #16 /* r3 = ..78 */
2717 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2718 mov ip, ip, lsr #16 /* ip = ..34 */
2719 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2720 mov r1, r1, lsr #16 /* r1 = ..x0 */
2721 #else
2722 mov r2, r3, lsr #16 /* r2 = ..A9 */
2723 strh r2, [r0, #0x09]
2724 mov r3, r3, lsl #16 /* r3 = 87.. */
2725 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2726 mov ip, ip, lsl #16 /* ip = 43.. */
2727 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2728 mov r1, r1, lsr #8 /* r1 = .210 */
2729 #endif
2730 str r3, [r0, #0x05]
2731 str ip, [r0, #0x01]
2732 strb r1, [r0]
2733 RET
2734 LMEMCPY_C_PAD
2735
2736 /*
2737 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2738 */
2739 #ifdef __ARMEB__
2740 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2741 ldr ip, [r1, #0x06] /* ip = 6789 */
2742 ldr r3, [r1, #0x02] /* r3 = 2345 */
2743 ldrh r1, [r1] /* r1 = ..01 */
2744 strb r2, [r0, #0x0b]
2745 mov r2, r2, lsr #8 /* r2 = ...A */
2746 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2747 mov ip, ip, lsr #8 /* ip = .678 */
2748 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2749 mov r3, r3, lsr #8 /* r3 = .234 */
2750 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2751 mov r1, r1, lsr #8 /* r1 = ...0 */
2752 strb r1, [r0]
2753 str r3, [r0, #0x01]
2754 str ip, [r0, #0x05]
2755 strh r2, [r0, #0x09]
2756 #else
2757 ldrh r2, [r1] /* r2 = ..10 */
2758 ldr r3, [r1, #0x02] /* r3 = 5432 */
2759 ldr ip, [r1, #0x06] /* ip = 9876 */
2760 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2761 strb r2, [r0]
2762 mov r2, r2, lsr #8 /* r2 = ...1 */
2763 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2764 mov r3, r3, lsr #24 /* r3 = ...5 */
2765 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2766 mov ip, ip, lsr #24 /* ip = ...9 */
2767 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2768 mov r1, r1, lsr #8 /* r1 = ...B */
2769 str r2, [r0, #0x01]
2770 str r3, [r0, #0x05]
2771 strh ip, [r0, #0x09]
2772 strb r1, [r0, #0x0b]
2773 #endif
2774 RET
2775 LMEMCPY_C_PAD
2776
2777 /*
2778 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2779 */
2780 ldrb r2, [r1]
2781 ldr r3, [r1, #0x01]
2782 ldr ip, [r1, #0x05]
2783 strb r2, [r0]
2784 ldrh r2, [r1, #0x09]
2785 ldrb r1, [r1, #0x0b]
2786 str r3, [r0, #0x01]
2787 str ip, [r0, #0x05]
2788 strh r2, [r0, #0x09]
2789 strb r1, [r0, #0x0b]
2790 RET
2791 #endif /* __XSCALE__ */
2792
2793 #ifdef GPROF
2794
2795 ENTRY(user)
2796 nop
2797 ENTRY(btrap)
2798 nop
2799 ENTRY(etrap)
2800 nop
2801 ENTRY(bintr)
2802 nop
2803 ENTRY(eintr)
2804 nop
2805
2806 #endif
Cache object: aa8c4c33c0105536bf8323fa371062bf
|