1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
2
3 /*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <machine/acle-compat.h>
42
43 .syntax unified
44 .text
45 .align 2
46
47 #if __ARM_ARCH >= 6
48 #define GET_PCB(tmp) \
49 mrc p15, 0, tmp, c13, c0, 4; \
50 add tmp, tmp, #(TD_PCB)
51 #else
52 .Lcurpcb:
53 .word _C_LABEL(__pcpu) + PC_CURPCB
54 #define GET_PCB(tmp) \
55 ldr tmp, .Lcurpcb
56 #endif
57
58 /*
59 * r0 = user space address
60 * r1 = kernel space address
61 * r2 = length
62 *
63 * Copies bytes from user space to kernel space
64 */
65 ENTRY(copyin)
66 cmp r2, #0x00
67 movle r0, #0x00
68 movle pc, lr /* Bail early if length is <= 0 */
69
70 ldr r3, .L_arm_memcpy
71 ldr r3, [r3]
72 cmp r3, #0
73 beq .Lnormal
74 ldr r3, .L_min_memcpy_size
75 ldr r3, [r3]
76 cmp r2, r3
77 blt .Lnormal
78 stmfd sp!, {r0-r2, r4, lr}
79 mov r3, r0
80 mov r0, r1
81 mov r1, r3
82 mov r3, #2 /* SRC_IS_USER */
83 ldr r4, .L_arm_memcpy
84 mov lr, pc
85 ldr pc, [r4]
86 cmp r0, #0
87 ldmfd sp!, {r0-r2, r4, lr}
88 moveq r0, #0
89 RETeq
90
91 .Lnormal:
92 stmfd sp!, {r10-r11, lr}
93
94 GET_PCB(r10)
95 ldr r10, [r10]
96
97 mov r3, #0x00
98 adr ip, .Lcopyin_fault
99 ldr r11, [r10, #PCB_ONFAULT]
100 str ip, [r10, #PCB_ONFAULT]
101 bl .Lcopyin_guts
102 str r11, [r10, #PCB_ONFAULT]
103 mov r0, #0x00
104 ldmfd sp!, {r10-r11, pc}
105
106 .Lcopyin_fault:
107 ldr r0, =EFAULT
108 str r11, [r10, #PCB_ONFAULT]
109 cmp r3, #0x00
110 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
111 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
112 ldmfd sp!, {r10-r11, pc}
113
114 .Lcopyin_guts:
115 pld [r0]
116 /* Word-align the destination buffer */
117 ands ip, r1, #0x03 /* Already word aligned? */
118 beq .Lcopyin_wordaligned /* Yup */
119 rsb ip, ip, #0x04
120 cmp r2, ip /* Enough bytes left to align it? */
121 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
122 sub r2, r2, ip
123 rsbs ip, ip, #0x03
124 addne pc, pc, ip, lsl #3
125 nop
126 ldrbt ip, [r0], #0x01
127 strb ip, [r1], #0x01
128 ldrbt ip, [r0], #0x01
129 strb ip, [r1], #0x01
130 ldrbt ip, [r0], #0x01
131 strb ip, [r1], #0x01
132 cmp r2, #0x00 /* All done? */
133 RETeq
134
135 /* Destination buffer is now word aligned */
136 .Lcopyin_wordaligned:
137 ands ip, r0, #0x03 /* Is src also word-aligned? */
138 bne .Lcopyin_bad_align /* Nope. Things just got bad */
139 cmp r2, #0x08 /* Less than 8 bytes remaining? */
140 blt .Lcopyin_w_less_than8
141
142 /* Quad-align the destination buffer */
143 tst r1, #0x07 /* Already quad aligned? */
144 ldrtne ip, [r0], #0x04
145 strne ip, [r1], #0x04
146 subne r2, r2, #0x04
147 stmfd sp!, {r4-r9} /* Free up some registers */
148 mov r3, #-1 /* Signal restore r4-r9 */
149
150 /* Destination buffer quad aligned, source is word aligned */
151 subs r2, r2, #0x80
152 blt .Lcopyin_w_lessthan128
153
154 /* Copy 128 bytes at a time */
155 .Lcopyin_w_loop128:
156 ldrt r4, [r0], #0x04 /* LD:00-03 */
157 ldrt r5, [r0], #0x04 /* LD:04-07 */
158 pld [r0, #0x18] /* Prefetch 0x20 */
159 ldrt r6, [r0], #0x04 /* LD:08-0b */
160 ldrt r7, [r0], #0x04 /* LD:0c-0f */
161 ldrt r8, [r0], #0x04 /* LD:10-13 */
162 ldrt r9, [r0], #0x04 /* LD:14-17 */
163 strd r4, [r1], #0x08 /* ST:00-07 */
164 ldrt r4, [r0], #0x04 /* LD:18-1b */
165 ldrt r5, [r0], #0x04 /* LD:1c-1f */
166 strd r6, [r1], #0x08 /* ST:08-0f */
167 ldrt r6, [r0], #0x04 /* LD:20-23 */
168 ldrt r7, [r0], #0x04 /* LD:24-27 */
169 pld [r0, #0x18] /* Prefetch 0x40 */
170 strd r8, [r1], #0x08 /* ST:10-17 */
171 ldrt r8, [r0], #0x04 /* LD:28-2b */
172 ldrt r9, [r0], #0x04 /* LD:2c-2f */
173 strd r4, [r1], #0x08 /* ST:18-1f */
174 ldrt r4, [r0], #0x04 /* LD:30-33 */
175 ldrt r5, [r0], #0x04 /* LD:34-37 */
176 strd r6, [r1], #0x08 /* ST:20-27 */
177 ldrt r6, [r0], #0x04 /* LD:38-3b */
178 ldrt r7, [r0], #0x04 /* LD:3c-3f */
179 strd r8, [r1], #0x08 /* ST:28-2f */
180 ldrt r8, [r0], #0x04 /* LD:40-43 */
181 ldrt r9, [r0], #0x04 /* LD:44-47 */
182 pld [r0, #0x18] /* Prefetch 0x60 */
183 strd r4, [r1], #0x08 /* ST:30-37 */
184 ldrt r4, [r0], #0x04 /* LD:48-4b */
185 ldrt r5, [r0], #0x04 /* LD:4c-4f */
186 strd r6, [r1], #0x08 /* ST:38-3f */
187 ldrt r6, [r0], #0x04 /* LD:50-53 */
188 ldrt r7, [r0], #0x04 /* LD:54-57 */
189 strd r8, [r1], #0x08 /* ST:40-47 */
190 ldrt r8, [r0], #0x04 /* LD:58-5b */
191 ldrt r9, [r0], #0x04 /* LD:5c-5f */
192 strd r4, [r1], #0x08 /* ST:48-4f */
193 ldrt r4, [r0], #0x04 /* LD:60-63 */
194 ldrt r5, [r0], #0x04 /* LD:64-67 */
195 pld [r0, #0x18] /* Prefetch 0x80 */
196 strd r6, [r1], #0x08 /* ST:50-57 */
197 ldrt r6, [r0], #0x04 /* LD:68-6b */
198 ldrt r7, [r0], #0x04 /* LD:6c-6f */
199 strd r8, [r1], #0x08 /* ST:58-5f */
200 ldrt r8, [r0], #0x04 /* LD:70-73 */
201 ldrt r9, [r0], #0x04 /* LD:74-77 */
202 strd r4, [r1], #0x08 /* ST:60-67 */
203 ldrt r4, [r0], #0x04 /* LD:78-7b */
204 ldrt r5, [r0], #0x04 /* LD:7c-7f */
205 strd r6, [r1], #0x08 /* ST:68-6f */
206 strd r8, [r1], #0x08 /* ST:70-77 */
207 subs r2, r2, #0x80
208 strd r4, [r1], #0x08 /* ST:78-7f */
209 bge .Lcopyin_w_loop128
210
211 .Lcopyin_w_lessthan128:
212 adds r2, r2, #0x80 /* Adjust for extra sub */
213 ldmfdeq sp!, {r4-r9}
214 RETeq
215 subs r2, r2, #0x20
216 blt .Lcopyin_w_lessthan32
217
218 /* Copy 32 bytes at a time */
219 .Lcopyin_w_loop32:
220 ldrt r4, [r0], #0x04
221 ldrt r5, [r0], #0x04
222 pld [r0, #0x18]
223 ldrt r6, [r0], #0x04
224 ldrt r7, [r0], #0x04
225 ldrt r8, [r0], #0x04
226 ldrt r9, [r0], #0x04
227 strd r4, [r1], #0x08
228 ldrt r4, [r0], #0x04
229 ldrt r5, [r0], #0x04
230 strd r6, [r1], #0x08
231 strd r8, [r1], #0x08
232 subs r2, r2, #0x20
233 strd r4, [r1], #0x08
234 bge .Lcopyin_w_loop32
235
236 .Lcopyin_w_lessthan32:
237 adds r2, r2, #0x20 /* Adjust for extra sub */
238 ldmfdeq sp!, {r4-r9}
239 RETeq /* Return now if done */
240
241 and r4, r2, #0x18
242 rsb r5, r4, #0x18
243 subs r2, r2, r4
244 add pc, pc, r5, lsl #1
245 nop
246
247 /* At least 24 bytes remaining */
248 ldrt r4, [r0], #0x04
249 ldrt r5, [r0], #0x04
250 nop
251 strd r4, [r1], #0x08
252
253 /* At least 16 bytes remaining */
254 ldrt r4, [r0], #0x04
255 ldrt r5, [r0], #0x04
256 nop
257 strd r4, [r1], #0x08
258
259 /* At least 8 bytes remaining */
260 ldrt r4, [r0], #0x04
261 ldrt r5, [r0], #0x04
262 nop
263 strd r4, [r1], #0x08
264
265 /* Less than 8 bytes remaining */
266 ldmfd sp!, {r4-r9}
267 RETeq /* Return now if done */
268 mov r3, #0x00
269
270 .Lcopyin_w_less_than8:
271 subs r2, r2, #0x04
272 ldrtge ip, [r0], #0x04
273 strge ip, [r1], #0x04
274 RETeq /* Return now if done */
275 addlt r2, r2, #0x04
276 ldrbt ip, [r0], #0x01
277 cmp r2, #0x02
278 ldrbtge r2, [r0], #0x01
279 strb ip, [r1], #0x01
280 ldrbtgt ip, [r0]
281 strbge r2, [r1], #0x01
282 strbgt ip, [r1]
283 RET
284
285 /*
286 * At this point, it has not been possible to word align both buffers.
287 * The destination buffer (r1) is word aligned, but the source buffer
288 * (r0) is not.
289 */
290 .Lcopyin_bad_align:
291 stmfd sp!, {r4-r7}
292 mov r3, #0x01
293 bic r0, r0, #0x03
294 cmp ip, #2
295 ldrt ip, [r0], #0x04
296 bgt .Lcopyin_bad3
297 beq .Lcopyin_bad2
298 b .Lcopyin_bad1
299
300 .Lcopyin_bad1_loop16:
301 #ifdef __ARMEB__
302 mov r4, ip, lsl #8
303 #else
304 mov r4, ip, lsr #8
305 #endif
306 ldrt r5, [r0], #0x04
307 pld [r0, #0x018]
308 ldrt r6, [r0], #0x04
309 ldrt r7, [r0], #0x04
310 ldrt ip, [r0], #0x04
311 #ifdef __ARMEB__
312 orr r4, r4, r5, lsr #24
313 mov r5, r5, lsl #8
314 orr r5, r5, r6, lsr #24
315 mov r6, r6, lsl #8
316 orr r6, r6, r7, lsr #24
317 mov r7, r7, lsl #8
318 orr r7, r7, ip, lsr #24
319 #else
320 orr r4, r4, r5, lsl #24
321 mov r5, r5, lsr #8
322 orr r5, r5, r6, lsl #24
323 mov r6, r6, lsr #8
324 orr r6, r6, r7, lsl #24
325 mov r7, r7, lsr #8
326 orr r7, r7, ip, lsl #24
327 #endif
328 str r4, [r1], #0x04
329 str r5, [r1], #0x04
330 str r6, [r1], #0x04
331 str r7, [r1], #0x04
332 .Lcopyin_bad1:
333 subs r2, r2, #0x10
334 bge .Lcopyin_bad1_loop16
335
336 adds r2, r2, #0x10
337 ldmfdeq sp!, {r4-r7}
338 RETeq /* Return now if done */
339 subs r2, r2, #0x04
340 sublt r0, r0, #0x03
341 blt .Lcopyin_l4
342
343 .Lcopyin_bad1_loop4:
344 #ifdef __ARMEB__
345 mov r4, ip, lsl #8
346 #else
347 mov r4, ip, lsr #8
348 #endif
349 ldrt ip, [r0], #0x04
350 subs r2, r2, #0x04
351 #ifdef __ARMEB__
352 orr r4, r4, ip, lsr #24
353 #else
354 orr r4, r4, ip, lsl #24
355 #endif
356 str r4, [r1], #0x04
357 bge .Lcopyin_bad1_loop4
358 sub r0, r0, #0x03
359 b .Lcopyin_l4
360
361 .Lcopyin_bad2_loop16:
362 #ifdef __ARMEB__
363 mov r4, ip, lsl #16
364 #else
365 mov r4, ip, lsr #16
366 #endif
367 ldrt r5, [r0], #0x04
368 pld [r0, #0x018]
369 ldrt r6, [r0], #0x04
370 ldrt r7, [r0], #0x04
371 ldrt ip, [r0], #0x04
372 #ifdef __ARMEB__
373 orr r4, r4, r5, lsr #16
374 mov r5, r5, lsl #16
375 orr r5, r5, r6, lsr #16
376 mov r6, r6, lsl #16
377 orr r6, r6, r7, lsr #16
378 mov r7, r7, lsl #16
379 orr r7, r7, ip, lsr #16
380 #else
381 orr r4, r4, r5, lsl #16
382 mov r5, r5, lsr #16
383 orr r5, r5, r6, lsl #16
384 mov r6, r6, lsr #16
385 orr r6, r6, r7, lsl #16
386 mov r7, r7, lsr #16
387 orr r7, r7, ip, lsl #16
388 #endif
389 str r4, [r1], #0x04
390 str r5, [r1], #0x04
391 str r6, [r1], #0x04
392 str r7, [r1], #0x04
393 .Lcopyin_bad2:
394 subs r2, r2, #0x10
395 bge .Lcopyin_bad2_loop16
396
397 adds r2, r2, #0x10
398 ldmfdeq sp!, {r4-r7}
399 RETeq /* Return now if done */
400 subs r2, r2, #0x04
401 sublt r0, r0, #0x02
402 blt .Lcopyin_l4
403
404 .Lcopyin_bad2_loop4:
405 #ifdef __ARMEB__
406 mov r4, ip, lsl #16
407 #else
408 mov r4, ip, lsr #16
409 #endif
410 ldrt ip, [r0], #0x04
411 subs r2, r2, #0x04
412 #ifdef __ARMEB__
413 orr r4, r4, ip, lsr #16
414 #else
415 orr r4, r4, ip, lsl #16
416 #endif
417 str r4, [r1], #0x04
418 bge .Lcopyin_bad2_loop4
419 sub r0, r0, #0x02
420 b .Lcopyin_l4
421
422 .Lcopyin_bad3_loop16:
423 #ifdef __ARMEB__
424 mov r4, ip, lsl #24
425 #else
426 mov r4, ip, lsr #24
427 #endif
428 ldrt r5, [r0], #0x04
429 pld [r0, #0x018]
430 ldrt r6, [r0], #0x04
431 ldrt r7, [r0], #0x04
432 ldrt ip, [r0], #0x04
433 #ifdef __ARMEB__
434 orr r4, r4, r5, lsr #8
435 mov r5, r5, lsl #24
436 orr r5, r5, r6, lsr #8
437 mov r6, r6, lsl #24
438 orr r6, r6, r7, lsr #8
439 mov r7, r7, lsl #24
440 orr r7, r7, ip, lsr #8
441 #else
442 orr r4, r4, r5, lsl #8
443 mov r5, r5, lsr #24
444 orr r5, r5, r6, lsl #8
445 mov r6, r6, lsr #24
446 orr r6, r6, r7, lsl #8
447 mov r7, r7, lsr #24
448 orr r7, r7, ip, lsl #8
449 #endif
450 str r4, [r1], #0x04
451 str r5, [r1], #0x04
452 str r6, [r1], #0x04
453 str r7, [r1], #0x04
454 .Lcopyin_bad3:
455 subs r2, r2, #0x10
456 bge .Lcopyin_bad3_loop16
457
458 adds r2, r2, #0x10
459 ldmfdeq sp!, {r4-r7}
460 RETeq /* Return now if done */
461 subs r2, r2, #0x04
462 sublt r0, r0, #0x01
463 blt .Lcopyin_l4
464
465 .Lcopyin_bad3_loop4:
466 #ifdef __ARMEB__
467 mov r4, ip, lsl #24
468 #else
469 mov r4, ip, lsr #24
470 #endif
471 ldrt ip, [r0], #0x04
472 subs r2, r2, #0x04
473 #ifdef __ARMEB__
474 orr r4, r4, ip, lsr #8
475 #else
476 orr r4, r4, ip, lsl #8
477 #endif
478 str r4, [r1], #0x04
479 bge .Lcopyin_bad3_loop4
480 sub r0, r0, #0x01
481
482 .Lcopyin_l4:
483 ldmfd sp!, {r4-r7}
484 mov r3, #0x00
485 adds r2, r2, #0x04
486 RETeq
487 .Lcopyin_l4_2:
488 rsbs r2, r2, #0x03
489 addne pc, pc, r2, lsl #3
490 nop
491 ldrbt ip, [r0], #0x01
492 strb ip, [r1], #0x01
493 ldrbt ip, [r0], #0x01
494 strb ip, [r1], #0x01
495 ldrbt ip, [r0]
496 strb ip, [r1]
497 RET
498 END(copyin)
499
500 /*
501 * r0 = kernel space address
502 * r1 = user space address
503 * r2 = length
504 *
505 * Copies bytes from kernel space to user space
506 */
507 ENTRY(copyout)
508 cmp r2, #0x00
509 movle r0, #0x00
510 movle pc, lr /* Bail early if length is <= 0 */
511
512 ldr r3, .L_arm_memcpy
513 ldr r3, [r3]
514 cmp r3, #0
515 beq .Lnormale
516 ldr r3, .L_min_memcpy_size
517 ldr r3, [r3]
518 cmp r2, r3
519 blt .Lnormale
520 stmfd sp!, {r0-r2, r4, lr}
521 mov r3, r0
522 mov r0, r1
523 mov r1, r3
524 mov r3, #1 /* DST_IS_USER */
525 ldr r4, .L_arm_memcpy
526 mov lr, pc
527 ldr pc, [r4]
528 cmp r0, #0
529 ldmfd sp!, {r0-r2, r4, lr}
530 moveq r0, #0
531 RETeq
532
533 .Lnormale:
534 stmfd sp!, {r10-r11, lr}
535
536 GET_PCB(r10)
537 ldr r10, [r10]
538
539 mov r3, #0x00
540 adr ip, .Lcopyout_fault
541 ldr r11, [r10, #PCB_ONFAULT]
542 str ip, [r10, #PCB_ONFAULT]
543 bl .Lcopyout_guts
544 str r11, [r10, #PCB_ONFAULT]
545 mov r0, #0x00
546 ldmfd sp!, {r10-r11, pc}
547
548 .Lcopyout_fault:
549 ldr r0, =EFAULT
550 str r11, [r10, #PCB_ONFAULT]
551 cmp r3, #0x00
552 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
553 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
554 ldmfd sp!, {r10-r11, pc}
555
556 .Lcopyout_guts:
557 pld [r0]
558 /* Word-align the destination buffer */
559 ands ip, r1, #0x03 /* Already word aligned? */
560 beq .Lcopyout_wordaligned /* Yup */
561 rsb ip, ip, #0x04
562 cmp r2, ip /* Enough bytes left to align it? */
563 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
564 sub r2, r2, ip
565 rsbs ip, ip, #0x03
566 addne pc, pc, ip, lsl #3
567 nop
568 ldrb ip, [r0], #0x01
569 strbt ip, [r1], #0x01
570 ldrb ip, [r0], #0x01
571 strbt ip, [r1], #0x01
572 ldrb ip, [r0], #0x01
573 strbt ip, [r1], #0x01
574 cmp r2, #0x00 /* All done? */
575 RETeq
576
577 /* Destination buffer is now word aligned */
578 .Lcopyout_wordaligned:
579 ands ip, r0, #0x03 /* Is src also word-aligned? */
580 bne .Lcopyout_bad_align /* Nope. Things just got bad */
581 cmp r2, #0x08 /* Less than 8 bytes remaining? */
582 blt .Lcopyout_w_less_than8
583
584 /* Quad-align the destination buffer */
585 tst r0, #0x07 /* Already quad aligned? */
586 ldrne ip, [r0], #0x04
587 subne r2, r2, #0x04
588 strtne ip, [r1], #0x04
589
590 stmfd sp!, {r4-r9} /* Free up some registers */
591 mov r3, #-1 /* Signal restore r4-r9 */
592
593 /* Destination buffer word aligned, source is quad aligned */
594 subs r2, r2, #0x80
595 blt .Lcopyout_w_lessthan128
596
597 /* Copy 128 bytes at a time */
598 .Lcopyout_w_loop128:
599 ldrd r4, [r0], #0x08 /* LD:00-07 */
600 pld [r0, #0x18] /* Prefetch 0x20 */
601 ldrd r6, [r0], #0x08 /* LD:08-0f */
602 ldrd r8, [r0], #0x08 /* LD:10-17 */
603 strt r4, [r1], #0x04 /* ST:00-03 */
604 strt r5, [r1], #0x04 /* ST:04-07 */
605 ldrd r4, [r0], #0x08 /* LD:18-1f */
606 strt r6, [r1], #0x04 /* ST:08-0b */
607 strt r7, [r1], #0x04 /* ST:0c-0f */
608 ldrd r6, [r0], #0x08 /* LD:20-27 */
609 pld [r0, #0x18] /* Prefetch 0x40 */
610 strt r8, [r1], #0x04 /* ST:10-13 */
611 strt r9, [r1], #0x04 /* ST:14-17 */
612 ldrd r8, [r0], #0x08 /* LD:28-2f */
613 strt r4, [r1], #0x04 /* ST:18-1b */
614 strt r5, [r1], #0x04 /* ST:1c-1f */
615 ldrd r4, [r0], #0x08 /* LD:30-37 */
616 strt r6, [r1], #0x04 /* ST:20-23 */
617 strt r7, [r1], #0x04 /* ST:24-27 */
618 ldrd r6, [r0], #0x08 /* LD:38-3f */
619 strt r8, [r1], #0x04 /* ST:28-2b */
620 strt r9, [r1], #0x04 /* ST:2c-2f */
621 ldrd r8, [r0], #0x08 /* LD:40-47 */
622 pld [r0, #0x18] /* Prefetch 0x60 */
623 strt r4, [r1], #0x04 /* ST:30-33 */
624 strt r5, [r1], #0x04 /* ST:34-37 */
625 ldrd r4, [r0], #0x08 /* LD:48-4f */
626 strt r6, [r1], #0x04 /* ST:38-3b */
627 strt r7, [r1], #0x04 /* ST:3c-3f */
628 ldrd r6, [r0], #0x08 /* LD:50-57 */
629 strt r8, [r1], #0x04 /* ST:40-43 */
630 strt r9, [r1], #0x04 /* ST:44-47 */
631 ldrd r8, [r0], #0x08 /* LD:58-4f */
632 strt r4, [r1], #0x04 /* ST:48-4b */
633 strt r5, [r1], #0x04 /* ST:4c-4f */
634 ldrd r4, [r0], #0x08 /* LD:60-67 */
635 pld [r0, #0x18] /* Prefetch 0x80 */
636 strt r6, [r1], #0x04 /* ST:50-53 */
637 strt r7, [r1], #0x04 /* ST:54-57 */
638 ldrd r6, [r0], #0x08 /* LD:68-6f */
639 strt r8, [r1], #0x04 /* ST:58-5b */
640 strt r9, [r1], #0x04 /* ST:5c-5f */
641 ldrd r8, [r0], #0x08 /* LD:70-77 */
642 strt r4, [r1], #0x04 /* ST:60-63 */
643 strt r5, [r1], #0x04 /* ST:64-67 */
644 ldrd r4, [r0], #0x08 /* LD:78-7f */
645 strt r6, [r1], #0x04 /* ST:68-6b */
646 strt r7, [r1], #0x04 /* ST:6c-6f */
647 strt r8, [r1], #0x04 /* ST:70-73 */
648 strt r9, [r1], #0x04 /* ST:74-77 */
649 subs r2, r2, #0x80
650 strt r4, [r1], #0x04 /* ST:78-7b */
651 strt r5, [r1], #0x04 /* ST:7c-7f */
652 bge .Lcopyout_w_loop128
653
654 .Lcopyout_w_lessthan128:
655 adds r2, r2, #0x80 /* Adjust for extra sub */
656 ldmfdeq sp!, {r4-r9}
657 RETeq /* Return now if done */
658 subs r2, r2, #0x20
659 blt .Lcopyout_w_lessthan32
660
661 /* Copy 32 bytes at a time */
662 .Lcopyout_w_loop32:
663 ldrd r4, [r0], #0x08
664 pld [r0, #0x18]
665 ldrd r6, [r0], #0x08
666 ldrd r8, [r0], #0x08
667 strt r4, [r1], #0x04
668 strt r5, [r1], #0x04
669 ldrd r4, [r0], #0x08
670 strt r6, [r1], #0x04
671 strt r7, [r1], #0x04
672 strt r8, [r1], #0x04
673 strt r9, [r1], #0x04
674 subs r2, r2, #0x20
675 strt r4, [r1], #0x04
676 strt r5, [r1], #0x04
677 bge .Lcopyout_w_loop32
678
679 .Lcopyout_w_lessthan32:
680 adds r2, r2, #0x20 /* Adjust for extra sub */
681 ldmfdeq sp!, {r4-r9}
682 RETeq /* Return now if done */
683
684 and r4, r2, #0x18
685 rsb r5, r4, #0x18
686 subs r2, r2, r4
687 add pc, pc, r5, lsl #1
688 nop
689
690 /* At least 24 bytes remaining */
691 ldrd r4, [r0], #0x08
692 strt r4, [r1], #0x04
693 strt r5, [r1], #0x04
694 nop
695
696 /* At least 16 bytes remaining */
697 ldrd r4, [r0], #0x08
698 strt r4, [r1], #0x04
699 strt r5, [r1], #0x04
700 nop
701
702 /* At least 8 bytes remaining */
703 ldrd r4, [r0], #0x08
704 strt r4, [r1], #0x04
705 strt r5, [r1], #0x04
706 nop
707
708 /* Less than 8 bytes remaining */
709 ldmfd sp!, {r4-r9}
710 RETeq /* Return now if done */
711 mov r3, #0x00
712
713 .Lcopyout_w_less_than8:
714 subs r2, r2, #0x04
715 ldrge ip, [r0], #0x04
716 strtge ip, [r1], #0x04
717 RETeq /* Return now if done */
718 addlt r2, r2, #0x04
719 ldrb ip, [r0], #0x01
720 cmp r2, #0x02
721 ldrbge r2, [r0], #0x01
722 strbt ip, [r1], #0x01
723 ldrbgt ip, [r0]
724 strbtge r2, [r1], #0x01
725 strbtgt ip, [r1]
726 RET
727
728 /*
729 * At this point, it has not been possible to word align both buffers.
730 * The destination buffer (r1) is word aligned, but the source buffer
731 * (r0) is not.
732 */
733 .Lcopyout_bad_align:
734 stmfd sp!, {r4-r7}
735 mov r3, #0x01
736 bic r0, r0, #0x03
737 cmp ip, #2
738 ldr ip, [r0], #0x04
739 bgt .Lcopyout_bad3
740 beq .Lcopyout_bad2
741 b .Lcopyout_bad1
742
743 .Lcopyout_bad1_loop16:
744 #ifdef __ARMEB__
745 mov r4, ip, lsl #8
746 #else
747 mov r4, ip, lsr #8
748 #endif
749 ldr r5, [r0], #0x04
750 pld [r0, #0x018]
751 ldr r6, [r0], #0x04
752 ldr r7, [r0], #0x04
753 ldr ip, [r0], #0x04
754 #ifdef __ARMEB__
755 orr r4, r4, r5, lsr #24
756 mov r5, r5, lsl #8
757 orr r5, r5, r6, lsr #24
758 mov r6, r6, lsl #8
759 orr r6, r6, r7, lsr #24
760 mov r7, r7, lsl #8
761 orr r7, r7, ip, lsr #24
762 #else
763 orr r4, r4, r5, lsl #24
764 mov r5, r5, lsr #8
765 orr r5, r5, r6, lsl #24
766 mov r6, r6, lsr #8
767 orr r6, r6, r7, lsl #24
768 mov r7, r7, lsr #8
769 orr r7, r7, ip, lsl #24
770 #endif
771 strt r4, [r1], #0x04
772 strt r5, [r1], #0x04
773 strt r6, [r1], #0x04
774 strt r7, [r1], #0x04
775 .Lcopyout_bad1:
776 subs r2, r2, #0x10
777 bge .Lcopyout_bad1_loop16
778
779 adds r2, r2, #0x10
780 ldmfdeq sp!, {r4-r7}
781 RETeq /* Return now if done */
782 subs r2, r2, #0x04
783 sublt r0, r0, #0x03
784 blt .Lcopyout_l4
785
786 .Lcopyout_bad1_loop4:
787 #ifdef __ARMEB__
788 mov r4, ip, lsl #8
789 #else
790 mov r4, ip, lsr #8
791 #endif
792 ldr ip, [r0], #0x04
793 subs r2, r2, #0x04
794 #ifdef __ARMEB__
795 orr r4, r4, ip, lsr #24
796 #else
797 orr r4, r4, ip, lsl #24
798 #endif
799 strt r4, [r1], #0x04
800 bge .Lcopyout_bad1_loop4
801 sub r0, r0, #0x03
802 b .Lcopyout_l4
803
804 .Lcopyout_bad2_loop16:
805 #ifdef __ARMEB__
806 mov r4, ip, lsl #16
807 #else
808 mov r4, ip, lsr #16
809 #endif
810 ldr r5, [r0], #0x04
811 pld [r0, #0x018]
812 ldr r6, [r0], #0x04
813 ldr r7, [r0], #0x04
814 ldr ip, [r0], #0x04
815 #ifdef __ARMEB__
816 orr r4, r4, r5, lsr #16
817 mov r5, r5, lsl #16
818 orr r5, r5, r6, lsr #16
819 mov r6, r6, lsl #16
820 orr r6, r6, r7, lsr #16
821 mov r7, r7, lsl #16
822 orr r7, r7, ip, lsr #16
823 #else
824 orr r4, r4, r5, lsl #16
825 mov r5, r5, lsr #16
826 orr r5, r5, r6, lsl #16
827 mov r6, r6, lsr #16
828 orr r6, r6, r7, lsl #16
829 mov r7, r7, lsr #16
830 orr r7, r7, ip, lsl #16
831 #endif
832 strt r4, [r1], #0x04
833 strt r5, [r1], #0x04
834 strt r6, [r1], #0x04
835 strt r7, [r1], #0x04
836 .Lcopyout_bad2:
837 subs r2, r2, #0x10
838 bge .Lcopyout_bad2_loop16
839
840 adds r2, r2, #0x10
841 ldmfdeq sp!, {r4-r7}
842 RETeq /* Return now if done */
843 subs r2, r2, #0x04
844 sublt r0, r0, #0x02
845 blt .Lcopyout_l4
846
847 .Lcopyout_bad2_loop4:
848 #ifdef __ARMEB__
849 mov r4, ip, lsl #16
850 #else
851 mov r4, ip, lsr #16
852 #endif
853 ldr ip, [r0], #0x04
854 subs r2, r2, #0x04
855 #ifdef __ARMEB__
856 orr r4, r4, ip, lsr #16
857 #else
858 orr r4, r4, ip, lsl #16
859 #endif
860 strt r4, [r1], #0x04
861 bge .Lcopyout_bad2_loop4
862 sub r0, r0, #0x02
863 b .Lcopyout_l4
864
865 .Lcopyout_bad3_loop16:
866 #ifdef __ARMEB__
867 mov r4, ip, lsl #24
868 #else
869 mov r4, ip, lsr #24
870 #endif
871 ldr r5, [r0], #0x04
872 pld [r0, #0x018]
873 ldr r6, [r0], #0x04
874 ldr r7, [r0], #0x04
875 ldr ip, [r0], #0x04
876 #ifdef __ARMEB__
877 orr r4, r4, r5, lsr #8
878 mov r5, r5, lsl #24
879 orr r5, r5, r6, lsr #8
880 mov r6, r6, lsl #24
881 orr r6, r6, r7, lsr #8
882 mov r7, r7, lsl #24
883 orr r7, r7, ip, lsr #8
884 #else
885 orr r4, r4, r5, lsl #8
886 mov r5, r5, lsr #24
887 orr r5, r5, r6, lsl #8
888 mov r6, r6, lsr #24
889 orr r6, r6, r7, lsl #8
890 mov r7, r7, lsr #24
891 orr r7, r7, ip, lsl #8
892 #endif
893 strt r4, [r1], #0x04
894 strt r5, [r1], #0x04
895 strt r6, [r1], #0x04
896 strt r7, [r1], #0x04
897 .Lcopyout_bad3:
898 subs r2, r2, #0x10
899 bge .Lcopyout_bad3_loop16
900
901 adds r2, r2, #0x10
902 ldmfdeq sp!, {r4-r7}
903 RETeq /* Return now if done */
904 subs r2, r2, #0x04
905 sublt r0, r0, #0x01
906 blt .Lcopyout_l4
907
908 .Lcopyout_bad3_loop4:
909 #ifdef __ARMEB__
910 mov r4, ip, lsl #24
911 #else
912 mov r4, ip, lsr #24
913 #endif
914 ldr ip, [r0], #0x04
915 subs r2, r2, #0x04
916 #ifdef __ARMEB__
917 orr r4, r4, ip, lsr #8
918 #else
919 orr r4, r4, ip, lsl #8
920 #endif
921 strt r4, [r1], #0x04
922 bge .Lcopyout_bad3_loop4
923 sub r0, r0, #0x01
924
925 .Lcopyout_l4:
926 ldmfd sp!, {r4-r7}
927 mov r3, #0x00
928 adds r2, r2, #0x04
929 RETeq
930 .Lcopyout_l4_2:
931 rsbs r2, r2, #0x03
932 addne pc, pc, r2, lsl #3
933 nop
934 ldrb ip, [r0], #0x01
935 strbt ip, [r1], #0x01
936 ldrb ip, [r0], #0x01
937 strbt ip, [r1], #0x01
938 ldrb ip, [r0]
939 strbt ip, [r1]
940 RET
941 END(copyout)
942
Cache object: 9fc86faa4f98a830330e1fb7bce7a071
|