FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S
1 /*-
2 * Copyright (c) 2004 Olivier Houchard
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
29 *
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
46 * written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 */
60 /*
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
63 *
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 *
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
87 */
88
89 #include <machine/asm.h>
90 __FBSDID("$FreeBSD$");
91
92 #include "assym.inc"
93
94 .syntax unified
95
96 .L_arm_memcpy:
97 .word _C_LABEL(_arm_memcpy)
98 .L_arm_bzero:
99 .word _C_LABEL(_arm_bzero)
100 .L_min_memcpy_size:
101 .word _C_LABEL(_min_memcpy_size)
102 .L_min_bzero_size:
103 .word _C_LABEL(_min_bzero_size)
104 /*
105 * memset: Sets a block of memory to the specified value
106 *
107 * On entry:
108 * r0 - dest address
109 * r1 - byte to write
110 * r2 - number of bytes to write
111 *
112 * On exit:
113 * r0 - dest address
114 */
115 /* LINTSTUB: Func: void bzero(void *, size_t) */
116 ENTRY(bzero)
117 ldr r3, .L_arm_bzero
118 ldr r3, [r3]
119 cmp r3, #0
120 beq .Lnormal0
121 ldr r2, .L_min_bzero_size
122 ldr r2, [r2]
123 cmp r1, r2
124 blt .Lnormal0
125 stmfd sp!, {r0, r1, lr}
126 mov r2, #0
127 mov lr, pc
128 mov pc, r3
129 cmp r0, #0
130 ldmfd sp!, {r0, r1, lr}
131 RETeq
132 .Lnormal0:
133 mov r3, #0x00
134 b do_memset
135 END(bzero)
136 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
137 ENTRY(memset)
138 and r3, r1, #0xff /* We deal with bytes */
139 mov r1, r2
140 do_memset:
141 cmp r1, #0x04 /* Do we have less than 4 bytes */
142 mov ip, r0
143 blt .Lmemset_lessthanfour
144
145 /* Ok first we will word align the address */
146 ands r2, ip, #0x03 /* Get the bottom two bits */
147 bne .Lmemset_wordunaligned /* The address is not word aligned */
148
149 /* We are now word aligned */
150 .Lmemset_wordaligned:
151 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
152 tst ip, #0x04 /* Quad-align for armv5e */
153 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
154 subne r1, r1, #0x04 /* Quad-align if necessary */
155 strne r3, [ip], #0x04
156 cmp r1, #0x10
157 blt .Lmemset_loop4 /* If less than 16 then use words */
158 mov r2, r3 /* Duplicate data */
159 cmp r1, #0x80 /* If < 128 then skip the big loop */
160 blt .Lmemset_loop32
161
162 /* Do 128 bytes at a time */
163 .Lmemset_loop128:
164 subs r1, r1, #0x80
165 strdge r2, [ip], #0x08
166 strdge r2, [ip], #0x08
167 strdge r2, [ip], #0x08
168 strdge r2, [ip], #0x08
169 strdge r2, [ip], #0x08
170 strdge r2, [ip], #0x08
171 strdge r2, [ip], #0x08
172 strdge r2, [ip], #0x08
173 strdge r2, [ip], #0x08
174 strdge r2, [ip], #0x08
175 strdge r2, [ip], #0x08
176 strdge r2, [ip], #0x08
177 strdge r2, [ip], #0x08
178 strdge r2, [ip], #0x08
179 strdge r2, [ip], #0x08
180 strdge r2, [ip], #0x08
181 bgt .Lmemset_loop128
182 RETeq /* Zero length so just exit */
183
184 add r1, r1, #0x80 /* Adjust for extra sub */
185
186 /* Do 32 bytes at a time */
187 .Lmemset_loop32:
188 subs r1, r1, #0x20
189 strdge r2, [ip], #0x08
190 strdge r2, [ip], #0x08
191 strdge r2, [ip], #0x08
192 strdge r2, [ip], #0x08
193 bgt .Lmemset_loop32
194 RETeq /* Zero length so just exit */
195
196 adds r1, r1, #0x10 /* Partially adjust for extra sub */
197
198 /* Deal with 16 bytes or more */
199 strdge r2, [ip], #0x08
200 strdge r2, [ip], #0x08
201 RETeq /* Zero length so just exit */
202
203 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
204
205 /* We have at least 4 bytes so copy as words */
206 .Lmemset_loop4:
207 subs r1, r1, #0x04
208 strge r3, [ip], #0x04
209 bgt .Lmemset_loop4
210 RETeq /* Zero length so just exit */
211
212 /* Compensate for 64-bit alignment check */
213 adds r1, r1, #0x04
214 RETeq
215 cmp r1, #2
216
217 strb r3, [ip], #0x01 /* Set 1 byte */
218 strbge r3, [ip], #0x01 /* Set another byte */
219 strbgt r3, [ip] /* and a third */
220 RET /* Exit */
221
222 .Lmemset_wordunaligned:
223 rsb r2, r2, #0x004
224 strb r3, [ip], #0x01 /* Set 1 byte */
225 cmp r2, #0x02
226 strbge r3, [ip], #0x01 /* Set another byte */
227 sub r1, r1, r2
228 strbgt r3, [ip], #0x01 /* and a third */
229 cmp r1, #0x04 /* More than 4 bytes left? */
230 bge .Lmemset_wordaligned /* Yup */
231
232 .Lmemset_lessthanfour:
233 cmp r1, #0x00
234 RETeq /* Zero length so exit */
235 strb r3, [ip], #0x01 /* Set 1 byte */
236 cmp r1, #0x02
237 strbge r3, [ip], #0x01 /* Set another byte */
238 strbgt r3, [ip] /* and a third */
239 RET /* Exit */
240 EEND(memset)
241 END(bzero)
242
243 ENTRY(bcmp)
244 mov ip, r0
245 cmp r2, #0x06
246 beq .Lmemcmp_6bytes
247 mov r0, #0x00
248
249 /* Are both addresses aligned the same way? */
250 cmp r2, #0x00
251 eorsne r3, ip, r1
252 RETeq /* len == 0, or same addresses! */
253 tst r3, #0x03
254 subne r2, r2, #0x01
255 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
256
257 /* Word-align the addresses, if necessary */
258 sub r3, r1, #0x05
259 ands r3, r3, #0x03
260 add r3, r3, r3, lsl #1
261 addne pc, pc, r3, lsl #3
262 nop
263
264 /* Compare up to 3 bytes */
265 ldrb r0, [ip], #0x01
266 ldrb r3, [r1], #0x01
267 subs r0, r0, r3
268 RETne
269 subs r2, r2, #0x01
270 RETeq
271
272 /* Compare up to 2 bytes */
273 ldrb r0, [ip], #0x01
274 ldrb r3, [r1], #0x01
275 subs r0, r0, r3
276 RETne
277 subs r2, r2, #0x01
278 RETeq
279
280 /* Compare 1 byte */
281 ldrb r0, [ip], #0x01
282 ldrb r3, [r1], #0x01
283 subs r0, r0, r3
284 RETne
285 subs r2, r2, #0x01
286 RETeq
287
288 /* Compare 4 bytes at a time, if possible */
289 subs r2, r2, #0x04
290 bcc .Lmemcmp_bytewise
291 .Lmemcmp_word_aligned:
292 ldr r0, [ip], #0x04
293 ldr r3, [r1], #0x04
294 subs r2, r2, #0x04
295 cmpcs r0, r3
296 beq .Lmemcmp_word_aligned
297 sub r0, r0, r3
298
299 /* Correct for extra subtraction, and check if done */
300 adds r2, r2, #0x04
301 cmpeq r0, #0x00 /* If done, did all bytes match? */
302 RETeq /* Yup. Just return */
303
304 /* Re-do the final word byte-wise */
305 sub ip, ip, #0x04
306 sub r1, r1, #0x04
307
308 .Lmemcmp_bytewise:
309 add r2, r2, #0x03
310 .Lmemcmp_bytewise2:
311 ldrb r0, [ip], #0x01
312 ldrb r3, [r1], #0x01
313 subs r2, r2, #0x01
314 cmpcs r0, r3
315 beq .Lmemcmp_bytewise2
316 sub r0, r0, r3
317 RET
318
319 /*
320 * 6 byte compares are very common, thanks to the network stack.
321 * This code is hand-scheduled to reduce the number of stalls for
322 * load results. Everything else being equal, this will be ~32%
323 * faster than a byte-wise memcmp.
324 */
325 .align 5
326 .Lmemcmp_6bytes:
327 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
328 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
329 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
330 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
331 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */
332 RETne /* Return if mismatch on #0 */
333 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
334 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */
335 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */
336 RETne /* Return if mismatch on #1 */
337 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
338 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
339 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */
340 RETne /* Return if mismatch on #2 */
341 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
342 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */
343 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */
344 RETne /* Return if mismatch on #3 */
345 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
346 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
347 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */
348 RETne /* Return if mismatch on #4 */
349 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
350 RET
351 END(bcmp)
352
353 ENTRY(bcopy)
354 /* switch the source and destination registers */
355 eor r0, r1, r0
356 eor r1, r0, r1
357 eor r0, r1, r0
358 EENTRY(memmove)
359 /* Do the buffers overlap? */
360 cmp r0, r1
361 RETeq /* Bail now if src/dst are the same */
362 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
363 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
364 cmp r3, r2 /* if (r3 < len) we have an overlap */
365 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
366
367 /* Determine copy direction */
368 cmp r1, r0
369 bcc .Lmemmove_backwards
370
371 moveq r0, #0 /* Quick abort for len=0 */
372 RETeq
373
374 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
375 subs r2, r2, #4
376 blt .Lmemmove_fl4 /* less than 4 bytes */
377 ands r12, r0, #3
378 bne .Lmemmove_fdestul /* oh unaligned destination addr */
379 ands r12, r1, #3
380 bne .Lmemmove_fsrcul /* oh unaligned source addr */
381
382 .Lmemmove_ft8:
383 /* We have aligned source and destination */
384 subs r2, r2, #8
385 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
386 subs r2, r2, #0x14
387 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
388 stmdb sp!, {r4} /* borrow r4 */
389
390 /* blat 32 bytes at a time */
391 /* XXX for really big copies perhaps we should use more registers */
392 .Lmemmove_floop32:
393 ldmia r1!, {r3, r4, r12, lr}
394 stmia r0!, {r3, r4, r12, lr}
395 ldmia r1!, {r3, r4, r12, lr}
396 stmia r0!, {r3, r4, r12, lr}
397 subs r2, r2, #0x20
398 bge .Lmemmove_floop32
399
400 cmn r2, #0x10
401 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
402 stmiage r0!, {r3, r4, r12, lr}
403 subge r2, r2, #0x10
404 ldmia sp!, {r4} /* return r4 */
405
406 .Lmemmove_fl32:
407 adds r2, r2, #0x14
408
409 /* blat 12 bytes at a time */
410 .Lmemmove_floop12:
411 ldmiage r1!, {r3, r12, lr}
412 stmiage r0!, {r3, r12, lr}
413 subsge r2, r2, #0x0c
414 bge .Lmemmove_floop12
415
416 .Lmemmove_fl12:
417 adds r2, r2, #8
418 blt .Lmemmove_fl4
419
420 subs r2, r2, #4
421 ldrlt r3, [r1], #4
422 strlt r3, [r0], #4
423 ldmiage r1!, {r3, r12}
424 stmiage r0!, {r3, r12}
425 subge r2, r2, #4
426
427 .Lmemmove_fl4:
428 /* less than 4 bytes to go */
429 adds r2, r2, #4
430 ldmiaeq sp!, {r0, pc} /* done */
431
432 /* copy the crud byte at a time */
433 cmp r2, #2
434 ldrb r3, [r1], #1
435 strb r3, [r0], #1
436 ldrbge r3, [r1], #1
437 strbge r3, [r0], #1
438 ldrbgt r3, [r1], #1
439 strbgt r3, [r0], #1
440 ldmia sp!, {r0, pc}
441
442 /* erg - unaligned destination */
443 .Lmemmove_fdestul:
444 rsb r12, r12, #4
445 cmp r12, #2
446
447 /* align destination with byte copies */
448 ldrb r3, [r1], #1
449 strb r3, [r0], #1
450 ldrbge r3, [r1], #1
451 strbge r3, [r0], #1
452 ldrbgt r3, [r1], #1
453 strbgt r3, [r0], #1
454 subs r2, r2, r12
455 blt .Lmemmove_fl4 /* less the 4 bytes */
456
457 ands r12, r1, #3
458 beq .Lmemmove_ft8 /* we have an aligned source */
459
460 /* erg - unaligned source */
461 /* This is where it gets nasty ... */
462 .Lmemmove_fsrcul:
463 bic r1, r1, #3
464 ldr lr, [r1], #4
465 cmp r12, #2
466 bgt .Lmemmove_fsrcul3
467 beq .Lmemmove_fsrcul2
468 cmp r2, #0x0c
469 blt .Lmemmove_fsrcul1loop4
470 sub r2, r2, #0x0c
471 stmdb sp!, {r4, r5}
472
473 .Lmemmove_fsrcul1loop16:
474 mov r3, lr, lsr #8
475 ldmia r1!, {r4, r5, r12, lr}
476 orr r3, r3, r4, lsl #24
477 mov r4, r4, lsr #8
478 orr r4, r4, r5, lsl #24
479 mov r5, r5, lsr #8
480 orr r5, r5, r12, lsl #24
481 mov r12, r12, lsr #8
482 orr r12, r12, lr, lsl #24
483 stmia r0!, {r3-r5, r12}
484 subs r2, r2, #0x10
485 bge .Lmemmove_fsrcul1loop16
486 ldmia sp!, {r4, r5}
487 adds r2, r2, #0x0c
488 blt .Lmemmove_fsrcul1l4
489
490 .Lmemmove_fsrcul1loop4:
491 mov r12, lr, lsr #8
492 ldr lr, [r1], #4
493 orr r12, r12, lr, lsl #24
494 str r12, [r0], #4
495 subs r2, r2, #4
496 bge .Lmemmove_fsrcul1loop4
497
498 .Lmemmove_fsrcul1l4:
499 sub r1, r1, #3
500 b .Lmemmove_fl4
501
502 .Lmemmove_fsrcul2:
503 cmp r2, #0x0c
504 blt .Lmemmove_fsrcul2loop4
505 sub r2, r2, #0x0c
506 stmdb sp!, {r4, r5}
507
508 .Lmemmove_fsrcul2loop16:
509 mov r3, lr, lsr #16
510 ldmia r1!, {r4, r5, r12, lr}
511 orr r3, r3, r4, lsl #16
512 mov r4, r4, lsr #16
513 orr r4, r4, r5, lsl #16
514 mov r5, r5, lsr #16
515 orr r5, r5, r12, lsl #16
516 mov r12, r12, lsr #16
517 orr r12, r12, lr, lsl #16
518 stmia r0!, {r3-r5, r12}
519 subs r2, r2, #0x10
520 bge .Lmemmove_fsrcul2loop16
521 ldmia sp!, {r4, r5}
522 adds r2, r2, #0x0c
523 blt .Lmemmove_fsrcul2l4
524
525 .Lmemmove_fsrcul2loop4:
526 mov r12, lr, lsr #16
527 ldr lr, [r1], #4
528 orr r12, r12, lr, lsl #16
529 str r12, [r0], #4
530 subs r2, r2, #4
531 bge .Lmemmove_fsrcul2loop4
532
533 .Lmemmove_fsrcul2l4:
534 sub r1, r1, #2
535 b .Lmemmove_fl4
536
537 .Lmemmove_fsrcul3:
538 cmp r2, #0x0c
539 blt .Lmemmove_fsrcul3loop4
540 sub r2, r2, #0x0c
541 stmdb sp!, {r4, r5}
542
543 .Lmemmove_fsrcul3loop16:
544 mov r3, lr, lsr #24
545 ldmia r1!, {r4, r5, r12, lr}
546 orr r3, r3, r4, lsl #8
547 mov r4, r4, lsr #24
548 orr r4, r4, r5, lsl #8
549 mov r5, r5, lsr #24
550 orr r5, r5, r12, lsl #8
551 mov r12, r12, lsr #24
552 orr r12, r12, lr, lsl #8
553 stmia r0!, {r3-r5, r12}
554 subs r2, r2, #0x10
555 bge .Lmemmove_fsrcul3loop16
556 ldmia sp!, {r4, r5}
557 adds r2, r2, #0x0c
558 blt .Lmemmove_fsrcul3l4
559
560 .Lmemmove_fsrcul3loop4:
561 mov r12, lr, lsr #24
562 ldr lr, [r1], #4
563 orr r12, r12, lr, lsl #8
564 str r12, [r0], #4
565 subs r2, r2, #4
566 bge .Lmemmove_fsrcul3loop4
567
568 .Lmemmove_fsrcul3l4:
569 sub r1, r1, #1
570 b .Lmemmove_fl4
571
572 .Lmemmove_backwards:
573 add r1, r1, r2
574 add r0, r0, r2
575 subs r2, r2, #4
576 blt .Lmemmove_bl4 /* less than 4 bytes */
577 ands r12, r0, #3
578 bne .Lmemmove_bdestul /* oh unaligned destination addr */
579 ands r12, r1, #3
580 bne .Lmemmove_bsrcul /* oh unaligned source addr */
581
582 .Lmemmove_bt8:
583 /* We have aligned source and destination */
584 subs r2, r2, #8
585 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
586 stmdb sp!, {r4, lr}
587 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
588 blt .Lmemmove_bl32
589
590 /* blat 32 bytes at a time */
591 /* XXX for really big copies perhaps we should use more registers */
592 .Lmemmove_bloop32:
593 ldmdb r1!, {r3, r4, r12, lr}
594 stmdb r0!, {r3, r4, r12, lr}
595 ldmdb r1!, {r3, r4, r12, lr}
596 stmdb r0!, {r3, r4, r12, lr}
597 subs r2, r2, #0x20
598 bge .Lmemmove_bloop32
599
600 .Lmemmove_bl32:
601 cmn r2, #0x10
602 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
603 stmdbge r0!, {r3, r4, r12, lr}
604 subge r2, r2, #0x10
605 adds r2, r2, #0x14
606 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
607 stmdbge r0!, {r3, r12, lr}
608 subge r2, r2, #0x0c
609 ldmia sp!, {r4, lr}
610
611 .Lmemmove_bl12:
612 adds r2, r2, #8
613 blt .Lmemmove_bl4
614 subs r2, r2, #4
615 ldrlt r3, [r1, #-4]!
616 strlt r3, [r0, #-4]!
617 ldmdbge r1!, {r3, r12}
618 stmdbge r0!, {r3, r12}
619 subge r2, r2, #4
620
621 .Lmemmove_bl4:
622 /* less than 4 bytes to go */
623 adds r2, r2, #4
624 RETeq /* done */
625
626 /* copy the crud byte at a time */
627 cmp r2, #2
628 ldrb r3, [r1, #-1]!
629 strb r3, [r0, #-1]!
630 ldrbge r3, [r1, #-1]!
631 strbge r3, [r0, #-1]!
632 ldrbgt r3, [r1, #-1]!
633 strbgt r3, [r0, #-1]!
634 RET
635
636 /* erg - unaligned destination */
637 .Lmemmove_bdestul:
638 cmp r12, #2
639
640 /* align destination with byte copies */
641 ldrb r3, [r1, #-1]!
642 strb r3, [r0, #-1]!
643 ldrbge r3, [r1, #-1]!
644 strbge r3, [r0, #-1]!
645 ldrbgt r3, [r1, #-1]!
646 strbgt r3, [r0, #-1]!
647 subs r2, r2, r12
648 blt .Lmemmove_bl4 /* less than 4 bytes to go */
649 ands r12, r1, #3
650 beq .Lmemmove_bt8 /* we have an aligned source */
651
652 /* erg - unaligned source */
653 /* This is where it gets nasty ... */
654 .Lmemmove_bsrcul:
655 bic r1, r1, #3
656 ldr r3, [r1, #0]
657 cmp r12, #2
658 blt .Lmemmove_bsrcul1
659 beq .Lmemmove_bsrcul2
660 cmp r2, #0x0c
661 blt .Lmemmove_bsrcul3loop4
662 sub r2, r2, #0x0c
663 stmdb sp!, {r4, r5, lr}
664
665 .Lmemmove_bsrcul3loop16:
666 mov lr, r3, lsl #8
667 ldmdb r1!, {r3-r5, r12}
668 orr lr, lr, r12, lsr #24
669 mov r12, r12, lsl #8
670 orr r12, r12, r5, lsr #24
671 mov r5, r5, lsl #8
672 orr r5, r5, r4, lsr #24
673 mov r4, r4, lsl #8
674 orr r4, r4, r3, lsr #24
675 stmdb r0!, {r4, r5, r12, lr}
676 subs r2, r2, #0x10
677 bge .Lmemmove_bsrcul3loop16
678 ldmia sp!, {r4, r5, lr}
679 adds r2, r2, #0x0c
680 blt .Lmemmove_bsrcul3l4
681
682 .Lmemmove_bsrcul3loop4:
683 mov r12, r3, lsl #8
684 ldr r3, [r1, #-4]!
685 orr r12, r12, r3, lsr #24
686 str r12, [r0, #-4]!
687 subs r2, r2, #4
688 bge .Lmemmove_bsrcul3loop4
689
690 .Lmemmove_bsrcul3l4:
691 add r1, r1, #3
692 b .Lmemmove_bl4
693
694 .Lmemmove_bsrcul2:
695 cmp r2, #0x0c
696 blt .Lmemmove_bsrcul2loop4
697 sub r2, r2, #0x0c
698 stmdb sp!, {r4, r5, lr}
699
700 .Lmemmove_bsrcul2loop16:
701 mov lr, r3, lsl #16
702 ldmdb r1!, {r3-r5, r12}
703 orr lr, lr, r12, lsr #16
704 mov r12, r12, lsl #16
705 orr r12, r12, r5, lsr #16
706 mov r5, r5, lsl #16
707 orr r5, r5, r4, lsr #16
708 mov r4, r4, lsl #16
709 orr r4, r4, r3, lsr #16
710 stmdb r0!, {r4, r5, r12, lr}
711 subs r2, r2, #0x10
712 bge .Lmemmove_bsrcul2loop16
713 ldmia sp!, {r4, r5, lr}
714 adds r2, r2, #0x0c
715 blt .Lmemmove_bsrcul2l4
716
717 .Lmemmove_bsrcul2loop4:
718 mov r12, r3, lsl #16
719 ldr r3, [r1, #-4]!
720 orr r12, r12, r3, lsr #16
721 str r12, [r0, #-4]!
722 subs r2, r2, #4
723 bge .Lmemmove_bsrcul2loop4
724
725 .Lmemmove_bsrcul2l4:
726 add r1, r1, #2
727 b .Lmemmove_bl4
728
729 .Lmemmove_bsrcul1:
730 cmp r2, #0x0c
731 blt .Lmemmove_bsrcul1loop4
732 sub r2, r2, #0x0c
733 stmdb sp!, {r4, r5, lr}
734
735 .Lmemmove_bsrcul1loop32:
736 mov lr, r3, lsl #24
737 ldmdb r1!, {r3-r5, r12}
738 orr lr, lr, r12, lsr #8
739 mov r12, r12, lsl #24
740 orr r12, r12, r5, lsr #8
741 mov r5, r5, lsl #24
742 orr r5, r5, r4, lsr #8
743 mov r4, r4, lsl #24
744 orr r4, r4, r3, lsr #8
745 stmdb r0!, {r4, r5, r12, lr}
746 subs r2, r2, #0x10
747 bge .Lmemmove_bsrcul1loop32
748 ldmia sp!, {r4, r5, lr}
749 adds r2, r2, #0x0c
750 blt .Lmemmove_bsrcul1l4
751
752 .Lmemmove_bsrcul1loop4:
753 mov r12, r3, lsl #24
754 ldr r3, [r1, #-4]!
755 orr r12, r12, r3, lsr #8
756 str r12, [r0, #-4]!
757 subs r2, r2, #4
758 bge .Lmemmove_bsrcul1loop4
759
760 .Lmemmove_bsrcul1l4:
761 add r1, r1, #1
762 b .Lmemmove_bl4
763 EEND(memmove)
764 END(bcopy)
765
766 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
767 ENTRY(memcpy)
768 pld [r1]
769 cmp r2, #0x0c
770 ble .Lmemcpy_short /* <= 12 bytes */
771 #ifdef FLASHADDR
772 #if FLASHADDR > PHYSADDR
773 ldr r3, =FLASHADDR
774 cmp r3, pc
775 bls .Lnormal
776 #else
777 ldr r3, =FLASHADDR
778 cmp r3, pc
779 bhi .Lnormal
780 #endif
781 #endif
782 ldr r3, .L_arm_memcpy
783 ldr r3, [r3]
784 cmp r3, #0
785 beq .Lnormal
786 ldr r3, .L_min_memcpy_size
787 ldr r3, [r3]
788 cmp r2, r3
789 blt .Lnormal
790 stmfd sp!, {r0-r2, r4, lr}
791 mov r3, #0
792 ldr r4, .L_arm_memcpy
793 mov lr, pc
794 ldr pc, [r4]
795 cmp r0, #0
796 ldmfd sp!, {r0-r2, r4, lr}
797 RETeq
798 .Lnormal:
799 mov r3, r0 /* We must not clobber r0 */
800
801 /* Word-align the destination buffer */
802 ands ip, r3, #0x03 /* Already word aligned? */
803 beq .Lmemcpy_wordaligned /* Yup */
804 cmp ip, #0x02
805 ldrb ip, [r1], #0x01
806 sub r2, r2, #0x01
807 strb ip, [r3], #0x01
808 ldrble ip, [r1], #0x01
809 suble r2, r2, #0x01
810 strble ip, [r3], #0x01
811 ldrblt ip, [r1], #0x01
812 sublt r2, r2, #0x01
813 strblt ip, [r3], #0x01
814
815 /* Destination buffer is now word aligned */
816 .Lmemcpy_wordaligned:
817 ands ip, r1, #0x03 /* Is src also word-aligned? */
818 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
819
820 /* Quad-align the destination buffer */
821 tst r3, #0x07 /* Already quad aligned? */
822 ldrne ip, [r1], #0x04
823 stmfd sp!, {r4-r9} /* Free up some registers */
824 subne r2, r2, #0x04
825 strne ip, [r3], #0x04
826
827 /* Destination buffer quad aligned, source is at least word aligned */
828 subs r2, r2, #0x80
829 blt .Lmemcpy_w_lessthan128
830
831 /* Copy 128 bytes at a time */
832 .Lmemcpy_w_loop128:
833 ldr r4, [r1], #0x04 /* LD:00-03 */
834 ldr r5, [r1], #0x04 /* LD:04-07 */
835 pld [r1, #0x18] /* Prefetch 0x20 */
836 ldr r6, [r1], #0x04 /* LD:08-0b */
837 ldr r7, [r1], #0x04 /* LD:0c-0f */
838 ldr r8, [r1], #0x04 /* LD:10-13 */
839 ldr r9, [r1], #0x04 /* LD:14-17 */
840 strd r4, [r3], #0x08 /* ST:00-07 */
841 ldr r4, [r1], #0x04 /* LD:18-1b */
842 ldr r5, [r1], #0x04 /* LD:1c-1f */
843 strd r6, [r3], #0x08 /* ST:08-0f */
844 ldr r6, [r1], #0x04 /* LD:20-23 */
845 ldr r7, [r1], #0x04 /* LD:24-27 */
846 pld [r1, #0x18] /* Prefetch 0x40 */
847 strd r8, [r3], #0x08 /* ST:10-17 */
848 ldr r8, [r1], #0x04 /* LD:28-2b */
849 ldr r9, [r1], #0x04 /* LD:2c-2f */
850 strd r4, [r3], #0x08 /* ST:18-1f */
851 ldr r4, [r1], #0x04 /* LD:30-33 */
852 ldr r5, [r1], #0x04 /* LD:34-37 */
853 strd r6, [r3], #0x08 /* ST:20-27 */
854 ldr r6, [r1], #0x04 /* LD:38-3b */
855 ldr r7, [r1], #0x04 /* LD:3c-3f */
856 strd r8, [r3], #0x08 /* ST:28-2f */
857 ldr r8, [r1], #0x04 /* LD:40-43 */
858 ldr r9, [r1], #0x04 /* LD:44-47 */
859 pld [r1, #0x18] /* Prefetch 0x60 */
860 strd r4, [r3], #0x08 /* ST:30-37 */
861 ldr r4, [r1], #0x04 /* LD:48-4b */
862 ldr r5, [r1], #0x04 /* LD:4c-4f */
863 strd r6, [r3], #0x08 /* ST:38-3f */
864 ldr r6, [r1], #0x04 /* LD:50-53 */
865 ldr r7, [r1], #0x04 /* LD:54-57 */
866 strd r8, [r3], #0x08 /* ST:40-47 */
867 ldr r8, [r1], #0x04 /* LD:58-5b */
868 ldr r9, [r1], #0x04 /* LD:5c-5f */
869 strd r4, [r3], #0x08 /* ST:48-4f */
870 ldr r4, [r1], #0x04 /* LD:60-63 */
871 ldr r5, [r1], #0x04 /* LD:64-67 */
872 pld [r1, #0x18] /* Prefetch 0x80 */
873 strd r6, [r3], #0x08 /* ST:50-57 */
874 ldr r6, [r1], #0x04 /* LD:68-6b */
875 ldr r7, [r1], #0x04 /* LD:6c-6f */
876 strd r8, [r3], #0x08 /* ST:58-5f */
877 ldr r8, [r1], #0x04 /* LD:70-73 */
878 ldr r9, [r1], #0x04 /* LD:74-77 */
879 strd r4, [r3], #0x08 /* ST:60-67 */
880 ldr r4, [r1], #0x04 /* LD:78-7b */
881 ldr r5, [r1], #0x04 /* LD:7c-7f */
882 strd r6, [r3], #0x08 /* ST:68-6f */
883 strd r8, [r3], #0x08 /* ST:70-77 */
884 subs r2, r2, #0x80
885 strd r4, [r3], #0x08 /* ST:78-7f */
886 bge .Lmemcpy_w_loop128
887
888 .Lmemcpy_w_lessthan128:
889 adds r2, r2, #0x80 /* Adjust for extra sub */
890 ldmfdeq sp!, {r4-r9}
891 RETeq /* Return now if done */
892 subs r2, r2, #0x20
893 blt .Lmemcpy_w_lessthan32
894
895 /* Copy 32 bytes at a time */
896 .Lmemcpy_w_loop32:
897 ldr r4, [r1], #0x04
898 ldr r5, [r1], #0x04
899 pld [r1, #0x18]
900 ldr r6, [r1], #0x04
901 ldr r7, [r1], #0x04
902 ldr r8, [r1], #0x04
903 ldr r9, [r1], #0x04
904 strd r4, [r3], #0x08
905 ldr r4, [r1], #0x04
906 ldr r5, [r1], #0x04
907 strd r6, [r3], #0x08
908 strd r8, [r3], #0x08
909 subs r2, r2, #0x20
910 strd r4, [r3], #0x08
911 bge .Lmemcpy_w_loop32
912
913 .Lmemcpy_w_lessthan32:
914 adds r2, r2, #0x20 /* Adjust for extra sub */
915 ldmfdeq sp!, {r4-r9}
916 RETeq /* Return now if done */
917
918 and r4, r2, #0x18
919 rsbs r4, r4, #0x18
920 addne pc, pc, r4, lsl #1
921 nop
922
923 /* At least 24 bytes remaining */
924 ldr r4, [r1], #0x04
925 ldr r5, [r1], #0x04
926 sub r2, r2, #0x08
927 strd r4, [r3], #0x08
928
929 /* At least 16 bytes remaining */
930 ldr r4, [r1], #0x04
931 ldr r5, [r1], #0x04
932 sub r2, r2, #0x08
933 strd r4, [r3], #0x08
934
935 /* At least 8 bytes remaining */
936 ldr r4, [r1], #0x04
937 ldr r5, [r1], #0x04
938 subs r2, r2, #0x08
939 strd r4, [r3], #0x08
940
941 /* Less than 8 bytes remaining */
942 ldmfd sp!, {r4-r9}
943 RETeq /* Return now if done */
944 subs r2, r2, #0x04
945 ldrge ip, [r1], #0x04
946 strge ip, [r3], #0x04
947 RETeq /* Return now if done */
948 addlt r2, r2, #0x04
949 ldrb ip, [r1], #0x01
950 cmp r2, #0x02
951 ldrbge r2, [r1], #0x01
952 strb ip, [r3], #0x01
953 ldrbgt ip, [r1]
954 strbge r2, [r3], #0x01
955 strbgt ip, [r3]
956 RET
957 /* Place a literal pool here for the above ldr instructions to use */
958 .ltorg
959
960
961 /*
962 * At this point, it has not been possible to word align both buffers.
963 * The destination buffer is word aligned, but the source buffer is not.
964 */
965 .Lmemcpy_bad_align:
966 stmfd sp!, {r4-r7}
967 bic r1, r1, #0x03
968 cmp ip, #2
969 ldr ip, [r1], #0x04
970 bgt .Lmemcpy_bad3
971 beq .Lmemcpy_bad2
972 b .Lmemcpy_bad1
973
974 .Lmemcpy_bad1_loop16:
975 mov r4, ip, lsr #8
976 ldr r5, [r1], #0x04
977 pld [r1, #0x018]
978 ldr r6, [r1], #0x04
979 ldr r7, [r1], #0x04
980 ldr ip, [r1], #0x04
981 orr r4, r4, r5, lsl #24
982 mov r5, r5, lsr #8
983 orr r5, r5, r6, lsl #24
984 mov r6, r6, lsr #8
985 orr r6, r6, r7, lsl #24
986 mov r7, r7, lsr #8
987 orr r7, r7, ip, lsl #24
988 str r4, [r3], #0x04
989 str r5, [r3], #0x04
990 str r6, [r3], #0x04
991 str r7, [r3], #0x04
992 .Lmemcpy_bad1:
993 subs r2, r2, #0x10
994 bge .Lmemcpy_bad1_loop16
995
996 adds r2, r2, #0x10
997 ldmfdeq sp!, {r4-r7}
998 RETeq /* Return now if done */
999 subs r2, r2, #0x04
1000 sublt r1, r1, #0x03
1001 blt .Lmemcpy_bad_done
1002
1003 .Lmemcpy_bad1_loop4:
1004 mov r4, ip, lsr #8
1005 ldr ip, [r1], #0x04
1006 subs r2, r2, #0x04
1007 orr r4, r4, ip, lsl #24
1008 str r4, [r3], #0x04
1009 bge .Lmemcpy_bad1_loop4
1010 sub r1, r1, #0x03
1011 b .Lmemcpy_bad_done
1012
1013 .Lmemcpy_bad2_loop16:
1014 mov r4, ip, lsr #16
1015 ldr r5, [r1], #0x04
1016 pld [r1, #0x018]
1017 ldr r6, [r1], #0x04
1018 ldr r7, [r1], #0x04
1019 ldr ip, [r1], #0x04
1020 orr r4, r4, r5, lsl #16
1021 mov r5, r5, lsr #16
1022 orr r5, r5, r6, lsl #16
1023 mov r6, r6, lsr #16
1024 orr r6, r6, r7, lsl #16
1025 mov r7, r7, lsr #16
1026 orr r7, r7, ip, lsl #16
1027 str r4, [r3], #0x04
1028 str r5, [r3], #0x04
1029 str r6, [r3], #0x04
1030 str r7, [r3], #0x04
1031 .Lmemcpy_bad2:
1032 subs r2, r2, #0x10
1033 bge .Lmemcpy_bad2_loop16
1034
1035 adds r2, r2, #0x10
1036 ldmfdeq sp!, {r4-r7}
1037 RETeq /* Return now if done */
1038 subs r2, r2, #0x04
1039 sublt r1, r1, #0x02
1040 blt .Lmemcpy_bad_done
1041
1042 .Lmemcpy_bad2_loop4:
1043 mov r4, ip, lsr #16
1044 ldr ip, [r1], #0x04
1045 subs r2, r2, #0x04
1046 orr r4, r4, ip, lsl #16
1047 str r4, [r3], #0x04
1048 bge .Lmemcpy_bad2_loop4
1049 sub r1, r1, #0x02
1050 b .Lmemcpy_bad_done
1051
1052 .Lmemcpy_bad3_loop16:
1053 mov r4, ip, lsr #24
1054 ldr r5, [r1], #0x04
1055 pld [r1, #0x018]
1056 ldr r6, [r1], #0x04
1057 ldr r7, [r1], #0x04
1058 ldr ip, [r1], #0x04
1059 orr r4, r4, r5, lsl #8
1060 mov r5, r5, lsr #24
1061 orr r5, r5, r6, lsl #8
1062 mov r6, r6, lsr #24
1063 orr r6, r6, r7, lsl #8
1064 mov r7, r7, lsr #24
1065 orr r7, r7, ip, lsl #8
1066 str r4, [r3], #0x04
1067 str r5, [r3], #0x04
1068 str r6, [r3], #0x04
1069 str r7, [r3], #0x04
1070 .Lmemcpy_bad3:
1071 subs r2, r2, #0x10
1072 bge .Lmemcpy_bad3_loop16
1073
1074 adds r2, r2, #0x10
1075 ldmfdeq sp!, {r4-r7}
1076 RETeq /* Return now if done */
1077 subs r2, r2, #0x04
1078 sublt r1, r1, #0x01
1079 blt .Lmemcpy_bad_done
1080
1081 .Lmemcpy_bad3_loop4:
1082 mov r4, ip, lsr #24
1083 ldr ip, [r1], #0x04
1084 subs r2, r2, #0x04
1085 orr r4, r4, ip, lsl #8
1086 str r4, [r3], #0x04
1087 bge .Lmemcpy_bad3_loop4
1088 sub r1, r1, #0x01
1089
1090 .Lmemcpy_bad_done:
1091 ldmfd sp!, {r4-r7}
1092 adds r2, r2, #0x04
1093 RETeq
1094 ldrb ip, [r1], #0x01
1095 cmp r2, #0x02
1096 ldrbge r2, [r1], #0x01
1097 strb ip, [r3], #0x01
1098 ldrbgt ip, [r1]
1099 strbge r2, [r3], #0x01
1100 strbgt ip, [r3]
1101 RET
1102
1103
1104 /*
1105 * Handle short copies (less than 16 bytes), possibly misaligned.
1106 * Some of these are *very* common, thanks to the network stack,
1107 * and so are handled specially.
1108 */
1109 .Lmemcpy_short:
1110 add pc, pc, r2, lsl #2
1111 nop
1112 RET /* 0x00 */
1113 b .Lmemcpy_bytewise /* 0x01 */
1114 b .Lmemcpy_bytewise /* 0x02 */
1115 b .Lmemcpy_bytewise /* 0x03 */
1116 b .Lmemcpy_4 /* 0x04 */
1117 b .Lmemcpy_bytewise /* 0x05 */
1118 b .Lmemcpy_6 /* 0x06 */
1119 b .Lmemcpy_bytewise /* 0x07 */
1120 b .Lmemcpy_8 /* 0x08 */
1121 b .Lmemcpy_bytewise /* 0x09 */
1122 b .Lmemcpy_bytewise /* 0x0a */
1123 b .Lmemcpy_bytewise /* 0x0b */
1124 b .Lmemcpy_c /* 0x0c */
1125 .Lmemcpy_bytewise:
1126 mov r3, r0 /* We must not clobber r0 */
1127 ldrb ip, [r1], #0x01
1128 1: subs r2, r2, #0x01
1129 strb ip, [r3], #0x01
1130 ldrbne ip, [r1], #0x01
1131 bne 1b
1132 RET
1133
1134 /******************************************************************************
1135 * Special case for 4 byte copies
1136 */
1137 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1138 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1139 LMEMCPY_4_PAD
1140 .Lmemcpy_4:
1141 and r2, r1, #0x03
1142 orr r2, r2, r0, lsl #2
1143 ands r2, r2, #0x0f
1144 sub r3, pc, #0x14
1145 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1146
1147 /*
1148 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1149 */
1150 ldr r2, [r1]
1151 str r2, [r0]
1152 RET
1153 LMEMCPY_4_PAD
1154
1155 /*
1156 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1157 */
1158 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1159 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1160 mov r3, r3, lsr #8 /* r3 = .210 */
1161 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1162 str r3, [r0]
1163 RET
1164 LMEMCPY_4_PAD
1165
1166 /*
1167 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1168 */
1169 ldrh r3, [r1, #0x02]
1170 ldrh r2, [r1]
1171 orr r3, r2, r3, lsl #16
1172 str r3, [r0]
1173 RET
1174 LMEMCPY_4_PAD
1175
1176 /*
1177 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1178 */
1179 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1180 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1181 mov r3, r3, lsr #24 /* r3 = ...0 */
1182 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1183 str r3, [r0]
1184 RET
1185 LMEMCPY_4_PAD
1186
1187 /*
1188 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1189 */
1190 ldr r2, [r1]
1191 strb r2, [r0]
1192 mov r3, r2, lsr #8
1193 mov r1, r2, lsr #24
1194 strb r1, [r0, #0x03]
1195 strh r3, [r0, #0x01]
1196 RET
1197 LMEMCPY_4_PAD
1198
1199 /*
1200 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1201 */
1202 ldrb r2, [r1]
1203 ldrh r3, [r1, #0x01]
1204 ldrb r1, [r1, #0x03]
1205 strb r2, [r0]
1206 strh r3, [r0, #0x01]
1207 strb r1, [r0, #0x03]
1208 RET
1209 LMEMCPY_4_PAD
1210
1211 /*
1212 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1213 */
1214 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1215 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1216 strb r2, [r0]
1217 mov r2, r2, lsr #8 /* r2 = ...1 */
1218 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1219 mov r3, r3, lsr #8 /* r3 = ...3 */
1220 strh r2, [r0, #0x01]
1221 strb r3, [r0, #0x03]
1222 RET
1223 LMEMCPY_4_PAD
1224
1225 /*
1226 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1227 */
1228 ldrb r2, [r1]
1229 ldrh r3, [r1, #0x01]
1230 ldrb r1, [r1, #0x03]
1231 strb r2, [r0]
1232 strh r3, [r0, #0x01]
1233 strb r1, [r0, #0x03]
1234 RET
1235 LMEMCPY_4_PAD
1236
1237 /*
1238 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1239 */
1240 ldr r2, [r1]
1241 strh r2, [r0]
1242 mov r3, r2, lsr #16
1243 strh r3, [r0, #0x02]
1244 RET
1245 LMEMCPY_4_PAD
1246
1247 /*
1248 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1249 */
1250 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1251 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1252 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1253 strh r1, [r0]
1254 mov r2, r2, lsr #24 /* r2 = ...2 */
1255 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1256 strh r2, [r0, #0x02]
1257 RET
1258 LMEMCPY_4_PAD
1259
1260 /*
1261 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1262 */
1263 ldrh r2, [r1]
1264 ldrh r3, [r1, #0x02]
1265 strh r2, [r0]
1266 strh r3, [r0, #0x02]
1267 RET
1268 LMEMCPY_4_PAD
1269
1270 /*
1271 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1272 */
1273 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1274 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1275 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1276 strh r1, [r0, #0x02]
1277 mov r3, r3, lsl #8 /* r3 = 321. */
1278 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1279 strh r3, [r0]
1280 RET
1281 LMEMCPY_4_PAD
1282
1283 /*
1284 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1285 */
1286 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1287 strb r2, [r0]
1288 mov r3, r2, lsr #8
1289 mov r1, r2, lsr #24
1290 strh r3, [r0, #0x01]
1291 strb r1, [r0, #0x03]
1292 RET
1293 LMEMCPY_4_PAD
1294
1295 /*
1296 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1297 */
1298 ldrb r2, [r1]
1299 ldrh r3, [r1, #0x01]
1300 ldrb r1, [r1, #0x03]
1301 strb r2, [r0]
1302 strh r3, [r0, #0x01]
1303 strb r1, [r0, #0x03]
1304 RET
1305 LMEMCPY_4_PAD
1306
1307 /*
1308 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1309 */
1310 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1311 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1312 strb r2, [r0]
1313 mov r2, r2, lsr #8 /* r2 = ...1 */
1314 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1315 strh r2, [r0, #0x01]
1316 mov r3, r3, lsr #8 /* r3 = ...3 */
1317 strb r3, [r0, #0x03]
1318 RET
1319 LMEMCPY_4_PAD
1320
1321 /*
1322 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1323 */
1324 ldrb r2, [r1]
1325 ldrh r3, [r1, #0x01]
1326 ldrb r1, [r1, #0x03]
1327 strb r2, [r0]
1328 strh r3, [r0, #0x01]
1329 strb r1, [r0, #0x03]
1330 RET
1331 LMEMCPY_4_PAD
1332
1333
1334 /******************************************************************************
1335 * Special case for 6 byte copies
1336 */
1337 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1338 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1339 LMEMCPY_6_PAD
1340 .Lmemcpy_6:
1341 and r2, r1, #0x03
1342 orr r2, r2, r0, lsl #2
1343 ands r2, r2, #0x0f
1344 sub r3, pc, #0x14
1345 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1346
1347 /*
1348 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1349 */
1350 ldr r2, [r1]
1351 ldrh r3, [r1, #0x04]
1352 str r2, [r0]
1353 strh r3, [r0, #0x04]
1354 RET
1355 LMEMCPY_6_PAD
1356
1357 /*
1358 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1359 */
1360 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1361 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1362 mov r2, r2, lsr #8 /* r2 = .210 */
1363 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1364 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1365 str r2, [r0]
1366 strh r3, [r0, #0x04]
1367 RET
1368 LMEMCPY_6_PAD
1369
1370 /*
1371 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1372 */
1373 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1374 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1375 mov r1, r3, lsr #16 /* r1 = ..54 */
1376 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1377 str r2, [r0]
1378 strh r1, [r0, #0x04]
1379 RET
1380 LMEMCPY_6_PAD
1381
1382 /*
1383 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1384 */
1385 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1386 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1387 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1388 mov r2, r2, lsr #24 /* r2 = ...0 */
1389 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1390 mov r1, r1, lsl #8 /* r1 = xx5. */
1391 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1392 str r2, [r0]
1393 strh r1, [r0, #0x04]
1394 RET
1395 LMEMCPY_6_PAD
1396
1397 /*
1398 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1399 */
1400 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1401 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1402 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1403 strh r1, [r0, #0x01]
1404 strb r3, [r0]
1405 mov r3, r3, lsr #24 /* r3 = ...3 */
1406 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1407 mov r2, r2, lsr #8 /* r2 = ...5 */
1408 strh r3, [r0, #0x03]
1409 strb r2, [r0, #0x05]
1410 RET
1411 LMEMCPY_6_PAD
1412
1413 /*
1414 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1415 */
1416 ldrb r2, [r1]
1417 ldrh r3, [r1, #0x01]
1418 ldrh ip, [r1, #0x03]
1419 ldrb r1, [r1, #0x05]
1420 strb r2, [r0]
1421 strh r3, [r0, #0x01]
1422 strh ip, [r0, #0x03]
1423 strb r1, [r0, #0x05]
1424 RET
1425 LMEMCPY_6_PAD
1426
1427 /*
1428 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1429 */
1430 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1431 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1432 strb r2, [r0]
1433 mov r3, r1, lsr #24
1434 strb r3, [r0, #0x05]
1435 mov r3, r1, lsr #8 /* r3 = .543 */
1436 strh r3, [r0, #0x03]
1437 mov r3, r2, lsr #8 /* r3 = ...1 */
1438 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
1439 strh r3, [r0, #0x01]
1440 RET
1441 LMEMCPY_6_PAD
1442
1443 /*
1444 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1445 */
1446 ldrb r2, [r1]
1447 ldrh r3, [r1, #0x01]
1448 ldrh ip, [r1, #0x03]
1449 ldrb r1, [r1, #0x05]
1450 strb r2, [r0]
1451 strh r3, [r0, #0x01]
1452 strh ip, [r0, #0x03]
1453 strb r1, [r0, #0x05]
1454 RET
1455 LMEMCPY_6_PAD
1456
1457 /*
1458 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1459 */
1460 ldrh r2, [r1, #0x04] /* r2 = ..54 */
1461 ldr r3, [r1] /* r3 = 3210 */
1462 mov r2, r2, lsl #16 /* r2 = 54.. */
1463 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
1464 strh r3, [r0]
1465 str r2, [r0, #0x02]
1466 RET
1467 LMEMCPY_6_PAD
1468
1469 /*
1470 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1471 */
1472 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1473 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
1474 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1475 mov r2, r2, lsl #8 /* r2 = 543. */
1476 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
1477 strh r1, [r0]
1478 str r2, [r0, #0x02]
1479 RET
1480 LMEMCPY_6_PAD
1481
1482 /*
1483 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1484 */
1485 ldrh r2, [r1]
1486 ldr r3, [r1, #0x02]
1487 strh r2, [r0]
1488 str r3, [r0, #0x02]
1489 RET
1490 LMEMCPY_6_PAD
1491
1492 /*
1493 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1494 */
1495 ldrb r3, [r1] /* r3 = ...0 */
1496 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1497 ldrb r1, [r1, #0x05] /* r1 = ...5 */
1498 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1499 mov r1, r1, lsl #24 /* r1 = 5... */
1500 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
1501 strh r3, [r0]
1502 str r1, [r0, #0x02]
1503 RET
1504 LMEMCPY_6_PAD
1505
1506 /*
1507 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1508 */
1509 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1510 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
1511 strb r2, [r0]
1512 mov r2, r2, lsr #8 /* r2 = .321 */
1513 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
1514 mov r1, r1, lsr #8 /* r1 = ...5 */
1515 str r2, [r0, #0x01]
1516 strb r1, [r0, #0x05]
1517 RET
1518 LMEMCPY_6_PAD
1519
1520 /*
1521 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1522 */
1523 ldrb r2, [r1]
1524 ldrh r3, [r1, #0x01]
1525 ldrh ip, [r1, #0x03]
1526 ldrb r1, [r1, #0x05]
1527 strb r2, [r0]
1528 strh r3, [r0, #0x01]
1529 strh ip, [r0, #0x03]
1530 strb r1, [r0, #0x05]
1531 RET
1532 LMEMCPY_6_PAD
1533
1534 /*
1535 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1536 */
1537 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1538 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1539 strb r2, [r0]
1540 mov r2, r2, lsr #8 /* r2 = ...1 */
1541 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
1542 mov r1, r1, lsr #24 /* r1 = ...5 */
1543 str r2, [r0, #0x01]
1544 strb r1, [r0, #0x05]
1545 RET
1546 LMEMCPY_6_PAD
1547
1548 /*
1549 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1550 */
1551 ldrb r2, [r1]
1552 ldr r3, [r1, #0x01]
1553 ldrb r1, [r1, #0x05]
1554 strb r2, [r0]
1555 str r3, [r0, #0x01]
1556 strb r1, [r0, #0x05]
1557 RET
1558 LMEMCPY_6_PAD
1559
1560
1561 /******************************************************************************
1562 * Special case for 8 byte copies
1563 */
1564 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
1565 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
1566 LMEMCPY_8_PAD
1567 .Lmemcpy_8:
1568 and r2, r1, #0x03
1569 orr r2, r2, r0, lsl #2
1570 ands r2, r2, #0x0f
1571 sub r3, pc, #0x14
1572 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
1573
1574 /*
1575 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1576 */
1577 ldr r2, [r1]
1578 ldr r3, [r1, #0x04]
1579 str r2, [r0]
1580 str r3, [r0, #0x04]
1581 RET
1582 LMEMCPY_8_PAD
1583
1584 /*
1585 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1586 */
1587 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1588 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
1589 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1590 mov r3, r3, lsr #8 /* r3 = .210 */
1591 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1592 mov r1, r1, lsl #24 /* r1 = 7... */
1593 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
1594 str r3, [r0]
1595 str r2, [r0, #0x04]
1596 RET
1597 LMEMCPY_8_PAD
1598
1599 /*
1600 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1601 */
1602 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1603 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1604 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1605 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1606 mov r3, r3, lsr #16 /* r3 = ..54 */
1607 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
1608 str r2, [r0]
1609 str r3, [r0, #0x04]
1610 RET
1611 LMEMCPY_8_PAD
1612
1613 /*
1614 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1615 */
1616 ldrb r3, [r1] /* r3 = ...0 */
1617 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1618 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
1619 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1620 mov r2, r2, lsr #24 /* r2 = ...4 */
1621 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
1622 str r3, [r0]
1623 str r2, [r0, #0x04]
1624 RET
1625 LMEMCPY_8_PAD
1626
1627 /*
1628 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1629 */
1630 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1631 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
1632 strb r3, [r0]
1633 mov r1, r2, lsr #24 /* r1 = ...7 */
1634 strb r1, [r0, #0x07]
1635 mov r1, r3, lsr #8 /* r1 = .321 */
1636 mov r3, r3, lsr #24 /* r3 = ...3 */
1637 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
1638 strh r1, [r0, #0x01]
1639 str r3, [r0, #0x03]
1640 RET
1641 LMEMCPY_8_PAD
1642
1643 /*
1644 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1645 */
1646 ldrb r2, [r1]
1647 ldrh r3, [r1, #0x01]
1648 ldr ip, [r1, #0x03]
1649 ldrb r1, [r1, #0x07]
1650 strb r2, [r0]
1651 strh r3, [r0, #0x01]
1652 str ip, [r0, #0x03]
1653 strb r1, [r0, #0x07]
1654 RET
1655 LMEMCPY_8_PAD
1656
1657 /*
1658 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1659 */
1660 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1661 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1662 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1663 strb r2, [r0] /* 0 */
1664 mov ip, r1, lsr #8 /* ip = ...7 */
1665 strb ip, [r0, #0x07] /* 7 */
1666 mov ip, r2, lsr #8 /* ip = ...1 */
1667 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1668 mov r3, r3, lsr #8 /* r3 = .543 */
1669 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
1670 strh ip, [r0, #0x01]
1671 str r3, [r0, #0x03]
1672 RET
1673 LMEMCPY_8_PAD
1674
1675 /*
1676 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1677 */
1678 ldrb r3, [r1] /* r3 = ...0 */
1679 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1680 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
1681 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1682 strb r3, [r0]
1683 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
1684 strh ip, [r0, #0x01]
1685 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
1686 str r2, [r0, #0x03]
1687 strb r1, [r0, #0x07]
1688 RET
1689 LMEMCPY_8_PAD
1690
1691 /*
1692 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1693 */
1694 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1695 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1696 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1697 strh r2, [r0]
1698 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
1699 mov r3, r3, lsr #16 /* r3 = ..76 */
1700 str r2, [r0, #0x02]
1701 strh r3, [r0, #0x06]
1702 RET
1703 LMEMCPY_8_PAD
1704
1705 /*
1706 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1707 */
1708 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1709 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1710 ldrb ip, [r1, #0x07] /* ip = ...7 */
1711 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1712 strh r1, [r0]
1713 mov r1, r2, lsr #24 /* r1 = ...2 */
1714 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
1715 mov r3, r3, lsr #24 /* r3 = ...6 */
1716 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
1717 str r1, [r0, #0x02]
1718 strh r3, [r0, #0x06]
1719 RET
1720 LMEMCPY_8_PAD
1721
1722 /*
1723 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1724 */
1725 ldrh r2, [r1]
1726 ldr ip, [r1, #0x02]
1727 ldrh r3, [r1, #0x06]
1728 strh r2, [r0]
1729 str ip, [r0, #0x02]
1730 strh r3, [r0, #0x06]
1731 RET
1732 LMEMCPY_8_PAD
1733
1734 /*
1735 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1736 */
1737 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
1738 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1739 ldrb ip, [r1] /* ip = ...0 */
1740 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
1741 strh r1, [r0, #0x06]
1742 mov r3, r3, lsl #24 /* r3 = 5... */
1743 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
1744 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
1745 str r3, [r0, #0x02]
1746 strh r2, [r0]
1747 RET
1748 LMEMCPY_8_PAD
1749
1750 /*
1751 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1752 */
1753 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1754 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1755 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
1756 strh r1, [r0, #0x05]
1757 strb r2, [r0]
1758 mov r1, r3, lsr #24 /* r1 = ...7 */
1759 strb r1, [r0, #0x07]
1760 mov r2, r2, lsr #8 /* r2 = .321 */
1761 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
1762 str r2, [r0, #0x01]
1763 RET
1764 LMEMCPY_8_PAD
1765
1766 /*
1767 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1768 */
1769 ldrb r3, [r1] /* r3 = ...0 */
1770 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
1771 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1772 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1773 strb r3, [r0]
1774 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
1775 strh r3, [r0, #0x05]
1776 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
1777 str r2, [r0, #0x01]
1778 strb r1, [r0, #0x07]
1779 RET
1780 LMEMCPY_8_PAD
1781
1782 /*
1783 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1784 */
1785 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1786 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1787 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1788 strb r2, [r0]
1789 mov ip, r2, lsr #8 /* ip = ...1 */
1790 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1791 mov r2, r1, lsr #8 /* r2 = ...7 */
1792 strb r2, [r0, #0x07]
1793 mov r1, r1, lsl #8 /* r1 = .76. */
1794 orr r1, r1, r3, lsr #24 /* r1 = .765 */
1795 str ip, [r0, #0x01]
1796 strh r1, [r0, #0x05]
1797 RET
1798 LMEMCPY_8_PAD
1799
1800 /*
1801 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1802 */
1803 ldrb r2, [r1]
1804 ldr ip, [r1, #0x01]
1805 ldrh r3, [r1, #0x05]
1806 ldrb r1, [r1, #0x07]
1807 strb r2, [r0]
1808 str ip, [r0, #0x01]
1809 strh r3, [r0, #0x05]
1810 strb r1, [r0, #0x07]
1811 RET
1812 LMEMCPY_8_PAD
1813
1814 /******************************************************************************
1815 * Special case for 12 byte copies
1816 */
1817 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
1818 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
1819 LMEMCPY_C_PAD
1820 .Lmemcpy_c:
1821 and r2, r1, #0x03
1822 orr r2, r2, r0, lsl #2
1823 ands r2, r2, #0x0f
1824 sub r3, pc, #0x14
1825 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
1826
1827 /*
1828 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1829 */
1830 ldr r2, [r1]
1831 ldr r3, [r1, #0x04]
1832 ldr r1, [r1, #0x08]
1833 str r2, [r0]
1834 str r3, [r0, #0x04]
1835 str r1, [r0, #0x08]
1836 RET
1837 LMEMCPY_C_PAD
1838
1839 /*
1840 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1841 */
1842 ldrb r2, [r1, #0xb] /* r2 = ...B */
1843 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1844 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1845 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1846 mov r2, r2, lsl #24 /* r2 = B... */
1847 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
1848 str r2, [r0, #0x08]
1849 mov r2, ip, lsl #24 /* r2 = 7... */
1850 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
1851 mov r1, r1, lsr #8 /* r1 = .210 */
1852 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
1853 str r2, [r0, #0x04]
1854 str r1, [r0]
1855 RET
1856 LMEMCPY_C_PAD
1857
1858 /*
1859 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1860 */
1861 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1862 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1863 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1864 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1865 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1866 str r2, [r0]
1867 mov r3, r3, lsr #16 /* r3 = ..54 */
1868 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
1869 mov r1, r1, lsl #16 /* r1 = BA.. */
1870 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
1871 str r3, [r0, #0x04]
1872 str r1, [r0, #0x08]
1873 RET
1874 LMEMCPY_C_PAD
1875
1876 /*
1877 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1878 */
1879 ldrb r2, [r1] /* r2 = ...0 */
1880 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1881 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1882 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1883 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1884 str r2, [r0]
1885 mov r3, r3, lsr #24 /* r3 = ...4 */
1886 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
1887 mov r1, r1, lsl #8 /* r1 = BA9. */
1888 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
1889 str r3, [r0, #0x04]
1890 str r1, [r0, #0x08]
1891 RET
1892 LMEMCPY_C_PAD
1893
1894 /*
1895 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1896 */
1897 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1898 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1899 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
1900 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1901 strh r1, [r0, #0x01]
1902 strb r2, [r0]
1903 mov r1, r2, lsr #24 /* r1 = ...3 */
1904 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
1905 mov r1, r3, lsr #24 /* r1 = ...7 */
1906 orr r1, r1, ip, lsl #8 /* r1 = A987 */
1907 mov ip, ip, lsr #24 /* ip = ...B */
1908 str r2, [r0, #0x03]
1909 str r1, [r0, #0x07]
1910 strb ip, [r0, #0x0b]
1911 RET
1912 LMEMCPY_C_PAD
1913
1914 /*
1915 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1916 */
1917 ldrb r2, [r1]
1918 ldrh r3, [r1, #0x01]
1919 ldr ip, [r1, #0x03]
1920 strb r2, [r0]
1921 ldr r2, [r1, #0x07]
1922 ldrb r1, [r1, #0x0b]
1923 strh r3, [r0, #0x01]
1924 str ip, [r0, #0x03]
1925 str r2, [r0, #0x07]
1926 strb r1, [r0, #0x0b]
1927 RET
1928 LMEMCPY_C_PAD
1929
1930 /*
1931 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1932 */
1933 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1934 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1935 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1936 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1937 strb r2, [r0]
1938 mov r2, r2, lsr #8 /* r2 = ...1 */
1939 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1940 strh r2, [r0, #0x01]
1941 mov r2, r3, lsr #8 /* r2 = .543 */
1942 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
1943 mov r2, ip, lsr #8 /* r2 = .987 */
1944 orr r2, r2, r1, lsl #24 /* r2 = A987 */
1945 mov r1, r1, lsr #8 /* r1 = ...B */
1946 str r3, [r0, #0x03]
1947 str r2, [r0, #0x07]
1948 strb r1, [r0, #0x0b]
1949 RET
1950 LMEMCPY_C_PAD
1951
1952 /*
1953 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1954 */
1955 ldrb r2, [r1]
1956 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1957 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1958 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1959 strb r2, [r0]
1960 strh r3, [r0, #0x01]
1961 mov r3, r3, lsr #16 /* r3 = ..43 */
1962 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
1963 mov ip, ip, lsr #16 /* ip = ..87 */
1964 orr ip, ip, r1, lsl #16 /* ip = A987 */
1965 mov r1, r1, lsr #16 /* r1 = ..xB */
1966 str r3, [r0, #0x03]
1967 str ip, [r0, #0x07]
1968 strb r1, [r0, #0x0b]
1969 RET
1970 LMEMCPY_C_PAD
1971
1972 /*
1973 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1974 */
1975 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
1976 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1977 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
1978 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1979 strh ip, [r0]
1980 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
1981 mov r3, r3, lsr #16 /* r3 = ..76 */
1982 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
1983 mov r2, r2, lsr #16 /* r2 = ..BA */
1984 str r1, [r0, #0x02]
1985 str r3, [r0, #0x06]
1986 strh r2, [r0, #0x0a]
1987 RET
1988 LMEMCPY_C_PAD
1989
1990 /*
1991 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1992 */
1993 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1994 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1995 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
1996 strh ip, [r0]
1997 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1998 ldrb r1, [r1, #0x0b] /* r1 = ...B */
1999 mov r2, r2, lsr #24 /* r2 = ...2 */
2000 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2001 mov r3, r3, lsr #24 /* r3 = ...6 */
2002 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2003 mov r1, r1, lsl #8 /* r1 = ..B. */
2004 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2005 str r2, [r0, #0x02]
2006 str r3, [r0, #0x06]
2007 strh r1, [r0, #0x0a]
2008 RET
2009 LMEMCPY_C_PAD
2010
2011 /*
2012 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2013 */
2014 ldrh r2, [r1]
2015 ldr r3, [r1, #0x02]
2016 ldr ip, [r1, #0x06]
2017 ldrh r1, [r1, #0x0a]
2018 strh r2, [r0]
2019 str r3, [r0, #0x02]
2020 str ip, [r0, #0x06]
2021 strh r1, [r0, #0x0a]
2022 RET
2023 LMEMCPY_C_PAD
2024
2025 /*
2026 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2027 */
2028 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2029 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2030 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2031 strh ip, [r0, #0x0a]
2032 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2033 ldrb r1, [r1] /* r1 = ...0 */
2034 mov r2, r2, lsl #24 /* r2 = 9... */
2035 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2036 mov r3, r3, lsl #24 /* r3 = 5... */
2037 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2038 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2039 str r2, [r0, #0x06]
2040 str r3, [r0, #0x02]
2041 strh r1, [r0]
2042 RET
2043 LMEMCPY_C_PAD
2044
2045 /*
2046 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2047 */
2048 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2049 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2050 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2051 strb r2, [r0]
2052 mov r3, r2, lsr #8 /* r3 = .321 */
2053 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2054 str r3, [r0, #0x01]
2055 mov r3, ip, lsr #8 /* r3 = .765 */
2056 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2057 str r3, [r0, #0x05]
2058 mov r1, r1, lsr #8 /* r1 = .BA9 */
2059 strh r1, [r0, #0x09]
2060 mov r1, r1, lsr #16 /* r1 = ...B */
2061 strb r1, [r0, #0x0b]
2062 RET
2063 LMEMCPY_C_PAD
2064
2065 /*
2066 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2067 */
2068 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2069 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2070 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2071 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2072 strb r2, [r0, #0x0b]
2073 mov r2, r3, lsr #16 /* r2 = ..A9 */
2074 strh r2, [r0, #0x09]
2075 mov r3, r3, lsl #16 /* r3 = 87.. */
2076 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2077 mov ip, ip, lsl #16 /* ip = 43.. */
2078 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2079 mov r1, r1, lsr #8 /* r1 = .210 */
2080 str r3, [r0, #0x05]
2081 str ip, [r0, #0x01]
2082 strb r1, [r0]
2083 RET
2084 LMEMCPY_C_PAD
2085
2086 /*
2087 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2088 */
2089 ldrh r2, [r1] /* r2 = ..10 */
2090 ldr r3, [r1, #0x02] /* r3 = 5432 */
2091 ldr ip, [r1, #0x06] /* ip = 9876 */
2092 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2093 strb r2, [r0]
2094 mov r2, r2, lsr #8 /* r2 = ...1 */
2095 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2096 mov r3, r3, lsr #24 /* r3 = ...5 */
2097 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2098 mov ip, ip, lsr #24 /* ip = ...9 */
2099 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2100 mov r1, r1, lsr #8 /* r1 = ...B */
2101 str r2, [r0, #0x01]
2102 str r3, [r0, #0x05]
2103 strh ip, [r0, #0x09]
2104 strb r1, [r0, #0x0b]
2105 RET
2106 LMEMCPY_C_PAD
2107
2108 /*
2109 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2110 */
2111 ldrb r2, [r1]
2112 ldr r3, [r1, #0x01]
2113 ldr ip, [r1, #0x05]
2114 strb r2, [r0]
2115 ldrh r2, [r1, #0x09]
2116 ldrb r1, [r1, #0x0b]
2117 str r3, [r0, #0x01]
2118 str ip, [r0, #0x05]
2119 strh r2, [r0, #0x09]
2120 strb r1, [r0, #0x0b]
2121 RET
2122 END(memcpy)
Cache object: 8b03d036167bbfec4105c97f415d4e04
|