1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
2
3 /*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD: releng/10.2/sys/arm/arm/bcopyinout_xscale.S 278652 2015-02-13 00:49:47Z ian $");
40
41 .syntax unified
42 .text
43 .align 2
44
45 #ifdef _ARM_ARCH_6
46 #define GET_PCB(tmp) \
47 mrc p15, 0, tmp, c13, c0, 4; \
48 add tmp, tmp, #(TD_PCB)
49 #else
50 .Lcurpcb:
51 .word _C_LABEL(__pcpu) + PC_CURPCB
52 #define GET_PCB(tmp) \
53 ldr tmp, .Lcurpcb
54 #endif
55
56 /*
57 * r0 = user space address
58 * r1 = kernel space address
59 * r2 = length
60 *
61 * Copies bytes from user space to kernel space
62 */
63 ENTRY(copyin)
64 cmp r2, #0x00
65 movle r0, #0x00
66 movle pc, lr /* Bail early if length is <= 0 */
67
68 ldr r3, .L_arm_memcpy
69 ldr r3, [r3]
70 cmp r3, #0
71 beq .Lnormal
72 ldr r3, .L_min_memcpy_size
73 ldr r3, [r3]
74 cmp r2, r3
75 blt .Lnormal
76 stmfd sp!, {r0-r2, r4, lr}
77 mov r3, r0
78 mov r0, r1
79 mov r1, r3
80 mov r3, #2 /* SRC_IS_USER */
81 ldr r4, .L_arm_memcpy
82 mov lr, pc
83 ldr pc, [r4]
84 cmp r0, #0
85 ldmfd sp!, {r0-r2, r4, lr}
86 moveq r0, #0
87 RETeq
88
89 .Lnormal:
90 stmfd sp!, {r10-r11, lr}
91
92 GET_PCB(r10)
93 ldr r10, [r10]
94
95 mov r3, #0x00
96 adr ip, .Lcopyin_fault
97 ldr r11, [r10, #PCB_ONFAULT]
98 str ip, [r10, #PCB_ONFAULT]
99 bl .Lcopyin_guts
100 str r11, [r10, #PCB_ONFAULT]
101 mov r0, #0x00
102 ldmfd sp!, {r10-r11, pc}
103
104 .Lcopyin_fault:
105 ldr r0, =EFAULT
106 str r11, [r10, #PCB_ONFAULT]
107 cmp r3, #0x00
108 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
109 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
110 ldmfd sp!, {r10-r11, pc}
111
112 .Lcopyin_guts:
113 pld [r0]
114 /* Word-align the destination buffer */
115 ands ip, r1, #0x03 /* Already word aligned? */
116 beq .Lcopyin_wordaligned /* Yup */
117 rsb ip, ip, #0x04
118 cmp r2, ip /* Enough bytes left to align it? */
119 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
120 sub r2, r2, ip
121 rsbs ip, ip, #0x03
122 addne pc, pc, ip, lsl #3
123 nop
124 ldrbt ip, [r0], #0x01
125 strb ip, [r1], #0x01
126 ldrbt ip, [r0], #0x01
127 strb ip, [r1], #0x01
128 ldrbt ip, [r0], #0x01
129 strb ip, [r1], #0x01
130 cmp r2, #0x00 /* All done? */
131 RETeq
132
133 /* Destination buffer is now word aligned */
134 .Lcopyin_wordaligned:
135 ands ip, r0, #0x03 /* Is src also word-aligned? */
136 bne .Lcopyin_bad_align /* Nope. Things just got bad */
137 cmp r2, #0x08 /* Less than 8 bytes remaining? */
138 blt .Lcopyin_w_less_than8
139
140 /* Quad-align the destination buffer */
141 tst r1, #0x07 /* Already quad aligned? */
142 ldrtne ip, [r0], #0x04
143 strne ip, [r1], #0x04
144 subne r2, r2, #0x04
145 stmfd sp!, {r4-r9} /* Free up some registers */
146 mov r3, #-1 /* Signal restore r4-r9 */
147
148 /* Destination buffer quad aligned, source is word aligned */
149 subs r2, r2, #0x80
150 blt .Lcopyin_w_lessthan128
151
152 /* Copy 128 bytes at a time */
153 .Lcopyin_w_loop128:
154 ldrt r4, [r0], #0x04 /* LD:00-03 */
155 ldrt r5, [r0], #0x04 /* LD:04-07 */
156 pld [r0, #0x18] /* Prefetch 0x20 */
157 ldrt r6, [r0], #0x04 /* LD:08-0b */
158 ldrt r7, [r0], #0x04 /* LD:0c-0f */
159 ldrt r8, [r0], #0x04 /* LD:10-13 */
160 ldrt r9, [r0], #0x04 /* LD:14-17 */
161 strd r4, [r1], #0x08 /* ST:00-07 */
162 ldrt r4, [r0], #0x04 /* LD:18-1b */
163 ldrt r5, [r0], #0x04 /* LD:1c-1f */
164 strd r6, [r1], #0x08 /* ST:08-0f */
165 ldrt r6, [r0], #0x04 /* LD:20-23 */
166 ldrt r7, [r0], #0x04 /* LD:24-27 */
167 pld [r0, #0x18] /* Prefetch 0x40 */
168 strd r8, [r1], #0x08 /* ST:10-17 */
169 ldrt r8, [r0], #0x04 /* LD:28-2b */
170 ldrt r9, [r0], #0x04 /* LD:2c-2f */
171 strd r4, [r1], #0x08 /* ST:18-1f */
172 ldrt r4, [r0], #0x04 /* LD:30-33 */
173 ldrt r5, [r0], #0x04 /* LD:34-37 */
174 strd r6, [r1], #0x08 /* ST:20-27 */
175 ldrt r6, [r0], #0x04 /* LD:38-3b */
176 ldrt r7, [r0], #0x04 /* LD:3c-3f */
177 strd r8, [r1], #0x08 /* ST:28-2f */
178 ldrt r8, [r0], #0x04 /* LD:40-43 */
179 ldrt r9, [r0], #0x04 /* LD:44-47 */
180 pld [r0, #0x18] /* Prefetch 0x60 */
181 strd r4, [r1], #0x08 /* ST:30-37 */
182 ldrt r4, [r0], #0x04 /* LD:48-4b */
183 ldrt r5, [r0], #0x04 /* LD:4c-4f */
184 strd r6, [r1], #0x08 /* ST:38-3f */
185 ldrt r6, [r0], #0x04 /* LD:50-53 */
186 ldrt r7, [r0], #0x04 /* LD:54-57 */
187 strd r8, [r1], #0x08 /* ST:40-47 */
188 ldrt r8, [r0], #0x04 /* LD:58-5b */
189 ldrt r9, [r0], #0x04 /* LD:5c-5f */
190 strd r4, [r1], #0x08 /* ST:48-4f */
191 ldrt r4, [r0], #0x04 /* LD:60-63 */
192 ldrt r5, [r0], #0x04 /* LD:64-67 */
193 pld [r0, #0x18] /* Prefetch 0x80 */
194 strd r6, [r1], #0x08 /* ST:50-57 */
195 ldrt r6, [r0], #0x04 /* LD:68-6b */
196 ldrt r7, [r0], #0x04 /* LD:6c-6f */
197 strd r8, [r1], #0x08 /* ST:58-5f */
198 ldrt r8, [r0], #0x04 /* LD:70-73 */
199 ldrt r9, [r0], #0x04 /* LD:74-77 */
200 strd r4, [r1], #0x08 /* ST:60-67 */
201 ldrt r4, [r0], #0x04 /* LD:78-7b */
202 ldrt r5, [r0], #0x04 /* LD:7c-7f */
203 strd r6, [r1], #0x08 /* ST:68-6f */
204 strd r8, [r1], #0x08 /* ST:70-77 */
205 subs r2, r2, #0x80
206 strd r4, [r1], #0x08 /* ST:78-7f */
207 bge .Lcopyin_w_loop128
208
209 .Lcopyin_w_lessthan128:
210 adds r2, r2, #0x80 /* Adjust for extra sub */
211 ldmfdeq sp!, {r4-r9}
212 RETeq
213 subs r2, r2, #0x20
214 blt .Lcopyin_w_lessthan32
215
216 /* Copy 32 bytes at a time */
217 .Lcopyin_w_loop32:
218 ldrt r4, [r0], #0x04
219 ldrt r5, [r0], #0x04
220 pld [r0, #0x18]
221 ldrt r6, [r0], #0x04
222 ldrt r7, [r0], #0x04
223 ldrt r8, [r0], #0x04
224 ldrt r9, [r0], #0x04
225 strd r4, [r1], #0x08
226 ldrt r4, [r0], #0x04
227 ldrt r5, [r0], #0x04
228 strd r6, [r1], #0x08
229 strd r8, [r1], #0x08
230 subs r2, r2, #0x20
231 strd r4, [r1], #0x08
232 bge .Lcopyin_w_loop32
233
234 .Lcopyin_w_lessthan32:
235 adds r2, r2, #0x20 /* Adjust for extra sub */
236 ldmfdeq sp!, {r4-r9}
237 RETeq /* Return now if done */
238
239 and r4, r2, #0x18
240 rsb r5, r4, #0x18
241 subs r2, r2, r4
242 add pc, pc, r5, lsl #1
243 nop
244
245 /* At least 24 bytes remaining */
246 ldrt r4, [r0], #0x04
247 ldrt r5, [r0], #0x04
248 nop
249 strd r4, [r1], #0x08
250
251 /* At least 16 bytes remaining */
252 ldrt r4, [r0], #0x04
253 ldrt r5, [r0], #0x04
254 nop
255 strd r4, [r1], #0x08
256
257 /* At least 8 bytes remaining */
258 ldrt r4, [r0], #0x04
259 ldrt r5, [r0], #0x04
260 nop
261 strd r4, [r1], #0x08
262
263 /* Less than 8 bytes remaining */
264 ldmfd sp!, {r4-r9}
265 RETeq /* Return now if done */
266 mov r3, #0x00
267
268 .Lcopyin_w_less_than8:
269 subs r2, r2, #0x04
270 ldrtge ip, [r0], #0x04
271 strge ip, [r1], #0x04
272 RETeq /* Return now if done */
273 addlt r2, r2, #0x04
274 ldrbt ip, [r0], #0x01
275 cmp r2, #0x02
276 ldrbtge r2, [r0], #0x01
277 strb ip, [r1], #0x01
278 ldrbtgt ip, [r0]
279 strbge r2, [r1], #0x01
280 strbgt ip, [r1]
281 RET
282
283 /*
284 * At this point, it has not been possible to word align both buffers.
285 * The destination buffer (r1) is word aligned, but the source buffer
286 * (r0) is not.
287 */
288 .Lcopyin_bad_align:
289 stmfd sp!, {r4-r7}
290 mov r3, #0x01
291 bic r0, r0, #0x03
292 cmp ip, #2
293 ldrt ip, [r0], #0x04
294 bgt .Lcopyin_bad3
295 beq .Lcopyin_bad2
296 b .Lcopyin_bad1
297
298 .Lcopyin_bad1_loop16:
299 #ifdef __ARMEB__
300 mov r4, ip, lsl #8
301 #else
302 mov r4, ip, lsr #8
303 #endif
304 ldrt r5, [r0], #0x04
305 pld [r0, #0x018]
306 ldrt r6, [r0], #0x04
307 ldrt r7, [r0], #0x04
308 ldrt ip, [r0], #0x04
309 #ifdef __ARMEB__
310 orr r4, r4, r5, lsr #24
311 mov r5, r5, lsl #8
312 orr r5, r5, r6, lsr #24
313 mov r6, r6, lsl #8
314 orr r6, r6, r7, lsr #24
315 mov r7, r7, lsl #8
316 orr r7, r7, ip, lsr #24
317 #else
318 orr r4, r4, r5, lsl #24
319 mov r5, r5, lsr #8
320 orr r5, r5, r6, lsl #24
321 mov r6, r6, lsr #8
322 orr r6, r6, r7, lsl #24
323 mov r7, r7, lsr #8
324 orr r7, r7, ip, lsl #24
325 #endif
326 str r4, [r1], #0x04
327 str r5, [r1], #0x04
328 str r6, [r1], #0x04
329 str r7, [r1], #0x04
330 .Lcopyin_bad1:
331 subs r2, r2, #0x10
332 bge .Lcopyin_bad1_loop16
333
334 adds r2, r2, #0x10
335 ldmfdeq sp!, {r4-r7}
336 RETeq /* Return now if done */
337 subs r2, r2, #0x04
338 sublt r0, r0, #0x03
339 blt .Lcopyin_l4
340
341 .Lcopyin_bad1_loop4:
342 #ifdef __ARMEB__
343 mov r4, ip, lsl #8
344 #else
345 mov r4, ip, lsr #8
346 #endif
347 ldrt ip, [r0], #0x04
348 subs r2, r2, #0x04
349 #ifdef __ARMEB__
350 orr r4, r4, ip, lsr #24
351 #else
352 orr r4, r4, ip, lsl #24
353 #endif
354 str r4, [r1], #0x04
355 bge .Lcopyin_bad1_loop4
356 sub r0, r0, #0x03
357 b .Lcopyin_l4
358
359 .Lcopyin_bad2_loop16:
360 #ifdef __ARMEB__
361 mov r4, ip, lsl #16
362 #else
363 mov r4, ip, lsr #16
364 #endif
365 ldrt r5, [r0], #0x04
366 pld [r0, #0x018]
367 ldrt r6, [r0], #0x04
368 ldrt r7, [r0], #0x04
369 ldrt ip, [r0], #0x04
370 #ifdef __ARMEB__
371 orr r4, r4, r5, lsr #16
372 mov r5, r5, lsl #16
373 orr r5, r5, r6, lsr #16
374 mov r6, r6, lsl #16
375 orr r6, r6, r7, lsr #16
376 mov r7, r7, lsl #16
377 orr r7, r7, ip, lsr #16
378 #else
379 orr r4, r4, r5, lsl #16
380 mov r5, r5, lsr #16
381 orr r5, r5, r6, lsl #16
382 mov r6, r6, lsr #16
383 orr r6, r6, r7, lsl #16
384 mov r7, r7, lsr #16
385 orr r7, r7, ip, lsl #16
386 #endif
387 str r4, [r1], #0x04
388 str r5, [r1], #0x04
389 str r6, [r1], #0x04
390 str r7, [r1], #0x04
391 .Lcopyin_bad2:
392 subs r2, r2, #0x10
393 bge .Lcopyin_bad2_loop16
394
395 adds r2, r2, #0x10
396 ldmfdeq sp!, {r4-r7}
397 RETeq /* Return now if done */
398 subs r2, r2, #0x04
399 sublt r0, r0, #0x02
400 blt .Lcopyin_l4
401
402 .Lcopyin_bad2_loop4:
403 #ifdef __ARMEB__
404 mov r4, ip, lsl #16
405 #else
406 mov r4, ip, lsr #16
407 #endif
408 ldrt ip, [r0], #0x04
409 subs r2, r2, #0x04
410 #ifdef __ARMEB__
411 orr r4, r4, ip, lsr #16
412 #else
413 orr r4, r4, ip, lsl #16
414 #endif
415 str r4, [r1], #0x04
416 bge .Lcopyin_bad2_loop4
417 sub r0, r0, #0x02
418 b .Lcopyin_l4
419
420 .Lcopyin_bad3_loop16:
421 #ifdef __ARMEB__
422 mov r4, ip, lsl #24
423 #else
424 mov r4, ip, lsr #24
425 #endif
426 ldrt r5, [r0], #0x04
427 pld [r0, #0x018]
428 ldrt r6, [r0], #0x04
429 ldrt r7, [r0], #0x04
430 ldrt ip, [r0], #0x04
431 #ifdef __ARMEB__
432 orr r4, r4, r5, lsr #8
433 mov r5, r5, lsl #24
434 orr r5, r5, r6, lsr #8
435 mov r6, r6, lsl #24
436 orr r6, r6, r7, lsr #8
437 mov r7, r7, lsl #24
438 orr r7, r7, ip, lsr #8
439 #else
440 orr r4, r4, r5, lsl #8
441 mov r5, r5, lsr #24
442 orr r5, r5, r6, lsl #8
443 mov r6, r6, lsr #24
444 orr r6, r6, r7, lsl #8
445 mov r7, r7, lsr #24
446 orr r7, r7, ip, lsl #8
447 #endif
448 str r4, [r1], #0x04
449 str r5, [r1], #0x04
450 str r6, [r1], #0x04
451 str r7, [r1], #0x04
452 .Lcopyin_bad3:
453 subs r2, r2, #0x10
454 bge .Lcopyin_bad3_loop16
455
456 adds r2, r2, #0x10
457 ldmfdeq sp!, {r4-r7}
458 RETeq /* Return now if done */
459 subs r2, r2, #0x04
460 sublt r0, r0, #0x01
461 blt .Lcopyin_l4
462
463 .Lcopyin_bad3_loop4:
464 #ifdef __ARMEB__
465 mov r4, ip, lsl #24
466 #else
467 mov r4, ip, lsr #24
468 #endif
469 ldrt ip, [r0], #0x04
470 subs r2, r2, #0x04
471 #ifdef __ARMEB__
472 orr r4, r4, ip, lsr #8
473 #else
474 orr r4, r4, ip, lsl #8
475 #endif
476 str r4, [r1], #0x04
477 bge .Lcopyin_bad3_loop4
478 sub r0, r0, #0x01
479
480 .Lcopyin_l4:
481 ldmfd sp!, {r4-r7}
482 mov r3, #0x00
483 adds r2, r2, #0x04
484 RETeq
485 .Lcopyin_l4_2:
486 rsbs r2, r2, #0x03
487 addne pc, pc, r2, lsl #3
488 nop
489 ldrbt ip, [r0], #0x01
490 strb ip, [r1], #0x01
491 ldrbt ip, [r0], #0x01
492 strb ip, [r1], #0x01
493 ldrbt ip, [r0]
494 strb ip, [r1]
495 RET
496 END(copyin)
497
498 /*
499 * r0 = kernel space address
500 * r1 = user space address
501 * r2 = length
502 *
503 * Copies bytes from kernel space to user space
504 */
505 ENTRY(copyout)
506 cmp r2, #0x00
507 movle r0, #0x00
508 movle pc, lr /* Bail early if length is <= 0 */
509
510 ldr r3, .L_arm_memcpy
511 ldr r3, [r3]
512 cmp r3, #0
513 beq .Lnormale
514 ldr r3, .L_min_memcpy_size
515 ldr r3, [r3]
516 cmp r2, r3
517 blt .Lnormale
518 stmfd sp!, {r0-r2, r4, lr}
519 mov r3, r0
520 mov r0, r1
521 mov r1, r3
522 mov r3, #1 /* DST_IS_USER */
523 ldr r4, .L_arm_memcpy
524 mov lr, pc
525 ldr pc, [r4]
526 cmp r0, #0
527 ldmfd sp!, {r0-r2, r4, lr}
528 moveq r0, #0
529 RETeq
530
531 .Lnormale:
532 stmfd sp!, {r10-r11, lr}
533
534 GET_PCB(r10)
535 ldr r10, [r10]
536
537 mov r3, #0x00
538 adr ip, .Lcopyout_fault
539 ldr r11, [r10, #PCB_ONFAULT]
540 str ip, [r10, #PCB_ONFAULT]
541 bl .Lcopyout_guts
542 str r11, [r10, #PCB_ONFAULT]
543 mov r0, #0x00
544 ldmfd sp!, {r10-r11, pc}
545
546 .Lcopyout_fault:
547 ldr r0, =EFAULT
548 str r11, [r10, #PCB_ONFAULT]
549 cmp r3, #0x00
550 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
551 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
552 ldmfd sp!, {r10-r11, pc}
553
554 .Lcopyout_guts:
555 pld [r0]
556 /* Word-align the destination buffer */
557 ands ip, r1, #0x03 /* Already word aligned? */
558 beq .Lcopyout_wordaligned /* Yup */
559 rsb ip, ip, #0x04
560 cmp r2, ip /* Enough bytes left to align it? */
561 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
562 sub r2, r2, ip
563 rsbs ip, ip, #0x03
564 addne pc, pc, ip, lsl #3
565 nop
566 ldrb ip, [r0], #0x01
567 strbt ip, [r1], #0x01
568 ldrb ip, [r0], #0x01
569 strbt ip, [r1], #0x01
570 ldrb ip, [r0], #0x01
571 strbt ip, [r1], #0x01
572 cmp r2, #0x00 /* All done? */
573 RETeq
574
575 /* Destination buffer is now word aligned */
576 .Lcopyout_wordaligned:
577 ands ip, r0, #0x03 /* Is src also word-aligned? */
578 bne .Lcopyout_bad_align /* Nope. Things just got bad */
579 cmp r2, #0x08 /* Less than 8 bytes remaining? */
580 blt .Lcopyout_w_less_than8
581
582 /* Quad-align the destination buffer */
583 tst r0, #0x07 /* Already quad aligned? */
584 ldrne ip, [r0], #0x04
585 subne r2, r2, #0x04
586 strtne ip, [r1], #0x04
587
588 stmfd sp!, {r4-r9} /* Free up some registers */
589 mov r3, #-1 /* Signal restore r4-r9 */
590
591 /* Destination buffer word aligned, source is quad aligned */
592 subs r2, r2, #0x80
593 blt .Lcopyout_w_lessthan128
594
595 /* Copy 128 bytes at a time */
596 .Lcopyout_w_loop128:
597 ldrd r4, [r0], #0x08 /* LD:00-07 */
598 pld [r0, #0x18] /* Prefetch 0x20 */
599 ldrd r6, [r0], #0x08 /* LD:08-0f */
600 ldrd r8, [r0], #0x08 /* LD:10-17 */
601 strt r4, [r1], #0x04 /* ST:00-03 */
602 strt r5, [r1], #0x04 /* ST:04-07 */
603 ldrd r4, [r0], #0x08 /* LD:18-1f */
604 strt r6, [r1], #0x04 /* ST:08-0b */
605 strt r7, [r1], #0x04 /* ST:0c-0f */
606 ldrd r6, [r0], #0x08 /* LD:20-27 */
607 pld [r0, #0x18] /* Prefetch 0x40 */
608 strt r8, [r1], #0x04 /* ST:10-13 */
609 strt r9, [r1], #0x04 /* ST:14-17 */
610 ldrd r8, [r0], #0x08 /* LD:28-2f */
611 strt r4, [r1], #0x04 /* ST:18-1b */
612 strt r5, [r1], #0x04 /* ST:1c-1f */
613 ldrd r4, [r0], #0x08 /* LD:30-37 */
614 strt r6, [r1], #0x04 /* ST:20-23 */
615 strt r7, [r1], #0x04 /* ST:24-27 */
616 ldrd r6, [r0], #0x08 /* LD:38-3f */
617 strt r8, [r1], #0x04 /* ST:28-2b */
618 strt r9, [r1], #0x04 /* ST:2c-2f */
619 ldrd r8, [r0], #0x08 /* LD:40-47 */
620 pld [r0, #0x18] /* Prefetch 0x60 */
621 strt r4, [r1], #0x04 /* ST:30-33 */
622 strt r5, [r1], #0x04 /* ST:34-37 */
623 ldrd r4, [r0], #0x08 /* LD:48-4f */
624 strt r6, [r1], #0x04 /* ST:38-3b */
625 strt r7, [r1], #0x04 /* ST:3c-3f */
626 ldrd r6, [r0], #0x08 /* LD:50-57 */
627 strt r8, [r1], #0x04 /* ST:40-43 */
628 strt r9, [r1], #0x04 /* ST:44-47 */
629 ldrd r8, [r0], #0x08 /* LD:58-4f */
630 strt r4, [r1], #0x04 /* ST:48-4b */
631 strt r5, [r1], #0x04 /* ST:4c-4f */
632 ldrd r4, [r0], #0x08 /* LD:60-67 */
633 pld [r0, #0x18] /* Prefetch 0x80 */
634 strt r6, [r1], #0x04 /* ST:50-53 */
635 strt r7, [r1], #0x04 /* ST:54-57 */
636 ldrd r6, [r0], #0x08 /* LD:68-6f */
637 strt r8, [r1], #0x04 /* ST:58-5b */
638 strt r9, [r1], #0x04 /* ST:5c-5f */
639 ldrd r8, [r0], #0x08 /* LD:70-77 */
640 strt r4, [r1], #0x04 /* ST:60-63 */
641 strt r5, [r1], #0x04 /* ST:64-67 */
642 ldrd r4, [r0], #0x08 /* LD:78-7f */
643 strt r6, [r1], #0x04 /* ST:68-6b */
644 strt r7, [r1], #0x04 /* ST:6c-6f */
645 strt r8, [r1], #0x04 /* ST:70-73 */
646 strt r9, [r1], #0x04 /* ST:74-77 */
647 subs r2, r2, #0x80
648 strt r4, [r1], #0x04 /* ST:78-7b */
649 strt r5, [r1], #0x04 /* ST:7c-7f */
650 bge .Lcopyout_w_loop128
651
652 .Lcopyout_w_lessthan128:
653 adds r2, r2, #0x80 /* Adjust for extra sub */
654 ldmfdeq sp!, {r4-r9}
655 RETeq /* Return now if done */
656 subs r2, r2, #0x20
657 blt .Lcopyout_w_lessthan32
658
659 /* Copy 32 bytes at a time */
660 .Lcopyout_w_loop32:
661 ldrd r4, [r0], #0x08
662 pld [r0, #0x18]
663 ldrd r6, [r0], #0x08
664 ldrd r8, [r0], #0x08
665 strt r4, [r1], #0x04
666 strt r5, [r1], #0x04
667 ldrd r4, [r0], #0x08
668 strt r6, [r1], #0x04
669 strt r7, [r1], #0x04
670 strt r8, [r1], #0x04
671 strt r9, [r1], #0x04
672 subs r2, r2, #0x20
673 strt r4, [r1], #0x04
674 strt r5, [r1], #0x04
675 bge .Lcopyout_w_loop32
676
677 .Lcopyout_w_lessthan32:
678 adds r2, r2, #0x20 /* Adjust for extra sub */
679 ldmfdeq sp!, {r4-r9}
680 RETeq /* Return now if done */
681
682 and r4, r2, #0x18
683 rsb r5, r4, #0x18
684 subs r2, r2, r4
685 add pc, pc, r5, lsl #1
686 nop
687
688 /* At least 24 bytes remaining */
689 ldrd r4, [r0], #0x08
690 strt r4, [r1], #0x04
691 strt r5, [r1], #0x04
692 nop
693
694 /* At least 16 bytes remaining */
695 ldrd r4, [r0], #0x08
696 strt r4, [r1], #0x04
697 strt r5, [r1], #0x04
698 nop
699
700 /* At least 8 bytes remaining */
701 ldrd r4, [r0], #0x08
702 strt r4, [r1], #0x04
703 strt r5, [r1], #0x04
704 nop
705
706 /* Less than 8 bytes remaining */
707 ldmfd sp!, {r4-r9}
708 RETeq /* Return now if done */
709 mov r3, #0x00
710
711 .Lcopyout_w_less_than8:
712 subs r2, r2, #0x04
713 ldrge ip, [r0], #0x04
714 strtge ip, [r1], #0x04
715 RETeq /* Return now if done */
716 addlt r2, r2, #0x04
717 ldrb ip, [r0], #0x01
718 cmp r2, #0x02
719 ldrbge r2, [r0], #0x01
720 strbt ip, [r1], #0x01
721 ldrbgt ip, [r0]
722 strbtge r2, [r1], #0x01
723 strbtgt ip, [r1]
724 RET
725
726 /*
727 * At this point, it has not been possible to word align both buffers.
728 * The destination buffer (r1) is word aligned, but the source buffer
729 * (r0) is not.
730 */
731 .Lcopyout_bad_align:
732 stmfd sp!, {r4-r7}
733 mov r3, #0x01
734 bic r0, r0, #0x03
735 cmp ip, #2
736 ldr ip, [r0], #0x04
737 bgt .Lcopyout_bad3
738 beq .Lcopyout_bad2
739 b .Lcopyout_bad1
740
741 .Lcopyout_bad1_loop16:
742 #ifdef __ARMEB__
743 mov r4, ip, lsl #8
744 #else
745 mov r4, ip, lsr #8
746 #endif
747 ldr r5, [r0], #0x04
748 pld [r0, #0x018]
749 ldr r6, [r0], #0x04
750 ldr r7, [r0], #0x04
751 ldr ip, [r0], #0x04
752 #ifdef __ARMEB__
753 orr r4, r4, r5, lsr #24
754 mov r5, r5, lsl #8
755 orr r5, r5, r6, lsr #24
756 mov r6, r6, lsl #8
757 orr r6, r6, r7, lsr #24
758 mov r7, r7, lsl #8
759 orr r7, r7, ip, lsr #24
760 #else
761 orr r4, r4, r5, lsl #24
762 mov r5, r5, lsr #8
763 orr r5, r5, r6, lsl #24
764 mov r6, r6, lsr #8
765 orr r6, r6, r7, lsl #24
766 mov r7, r7, lsr #8
767 orr r7, r7, ip, lsl #24
768 #endif
769 strt r4, [r1], #0x04
770 strt r5, [r1], #0x04
771 strt r6, [r1], #0x04
772 strt r7, [r1], #0x04
773 .Lcopyout_bad1:
774 subs r2, r2, #0x10
775 bge .Lcopyout_bad1_loop16
776
777 adds r2, r2, #0x10
778 ldmfdeq sp!, {r4-r7}
779 RETeq /* Return now if done */
780 subs r2, r2, #0x04
781 sublt r0, r0, #0x03
782 blt .Lcopyout_l4
783
784 .Lcopyout_bad1_loop4:
785 #ifdef __ARMEB__
786 mov r4, ip, lsl #8
787 #else
788 mov r4, ip, lsr #8
789 #endif
790 ldr ip, [r0], #0x04
791 subs r2, r2, #0x04
792 #ifdef __ARMEB__
793 orr r4, r4, ip, lsr #24
794 #else
795 orr r4, r4, ip, lsl #24
796 #endif
797 strt r4, [r1], #0x04
798 bge .Lcopyout_bad1_loop4
799 sub r0, r0, #0x03
800 b .Lcopyout_l4
801
802 .Lcopyout_bad2_loop16:
803 #ifdef __ARMEB__
804 mov r4, ip, lsl #16
805 #else
806 mov r4, ip, lsr #16
807 #endif
808 ldr r5, [r0], #0x04
809 pld [r0, #0x018]
810 ldr r6, [r0], #0x04
811 ldr r7, [r0], #0x04
812 ldr ip, [r0], #0x04
813 #ifdef __ARMEB__
814 orr r4, r4, r5, lsr #16
815 mov r5, r5, lsl #16
816 orr r5, r5, r6, lsr #16
817 mov r6, r6, lsl #16
818 orr r6, r6, r7, lsr #16
819 mov r7, r7, lsl #16
820 orr r7, r7, ip, lsr #16
821 #else
822 orr r4, r4, r5, lsl #16
823 mov r5, r5, lsr #16
824 orr r5, r5, r6, lsl #16
825 mov r6, r6, lsr #16
826 orr r6, r6, r7, lsl #16
827 mov r7, r7, lsr #16
828 orr r7, r7, ip, lsl #16
829 #endif
830 strt r4, [r1], #0x04
831 strt r5, [r1], #0x04
832 strt r6, [r1], #0x04
833 strt r7, [r1], #0x04
834 .Lcopyout_bad2:
835 subs r2, r2, #0x10
836 bge .Lcopyout_bad2_loop16
837
838 adds r2, r2, #0x10
839 ldmfdeq sp!, {r4-r7}
840 RETeq /* Return now if done */
841 subs r2, r2, #0x04
842 sublt r0, r0, #0x02
843 blt .Lcopyout_l4
844
845 .Lcopyout_bad2_loop4:
846 #ifdef __ARMEB__
847 mov r4, ip, lsl #16
848 #else
849 mov r4, ip, lsr #16
850 #endif
851 ldr ip, [r0], #0x04
852 subs r2, r2, #0x04
853 #ifdef __ARMEB__
854 orr r4, r4, ip, lsr #16
855 #else
856 orr r4, r4, ip, lsl #16
857 #endif
858 strt r4, [r1], #0x04
859 bge .Lcopyout_bad2_loop4
860 sub r0, r0, #0x02
861 b .Lcopyout_l4
862
863 .Lcopyout_bad3_loop16:
864 #ifdef __ARMEB__
865 mov r4, ip, lsl #24
866 #else
867 mov r4, ip, lsr #24
868 #endif
869 ldr r5, [r0], #0x04
870 pld [r0, #0x018]
871 ldr r6, [r0], #0x04
872 ldr r7, [r0], #0x04
873 ldr ip, [r0], #0x04
874 #ifdef __ARMEB__
875 orr r4, r4, r5, lsr #8
876 mov r5, r5, lsl #24
877 orr r5, r5, r6, lsr #8
878 mov r6, r6, lsl #24
879 orr r6, r6, r7, lsr #8
880 mov r7, r7, lsl #24
881 orr r7, r7, ip, lsr #8
882 #else
883 orr r4, r4, r5, lsl #8
884 mov r5, r5, lsr #24
885 orr r5, r5, r6, lsl #8
886 mov r6, r6, lsr #24
887 orr r6, r6, r7, lsl #8
888 mov r7, r7, lsr #24
889 orr r7, r7, ip, lsl #8
890 #endif
891 strt r4, [r1], #0x04
892 strt r5, [r1], #0x04
893 strt r6, [r1], #0x04
894 strt r7, [r1], #0x04
895 .Lcopyout_bad3:
896 subs r2, r2, #0x10
897 bge .Lcopyout_bad3_loop16
898
899 adds r2, r2, #0x10
900 ldmfdeq sp!, {r4-r7}
901 RETeq /* Return now if done */
902 subs r2, r2, #0x04
903 sublt r0, r0, #0x01
904 blt .Lcopyout_l4
905
906 .Lcopyout_bad3_loop4:
907 #ifdef __ARMEB__
908 mov r4, ip, lsl #24
909 #else
910 mov r4, ip, lsr #24
911 #endif
912 ldr ip, [r0], #0x04
913 subs r2, r2, #0x04
914 #ifdef __ARMEB__
915 orr r4, r4, ip, lsr #8
916 #else
917 orr r4, r4, ip, lsl #8
918 #endif
919 strt r4, [r1], #0x04
920 bge .Lcopyout_bad3_loop4
921 sub r0, r0, #0x01
922
923 .Lcopyout_l4:
924 ldmfd sp!, {r4-r7}
925 mov r3, #0x00
926 adds r2, r2, #0x04
927 RETeq
928 .Lcopyout_l4_2:
929 rsbs r2, r2, #0x03
930 addne pc, pc, r2, lsl #3
931 nop
932 ldrb ip, [r0], #0x01
933 strbt ip, [r1], #0x01
934 ldrb ip, [r0], #0x01
935 strbt ip, [r1], #0x01
936 ldrb ip, [r0]
937 strbt ip, [r1]
938 RET
939 END(copyout)
940
Cache object: 81f11565164ab3ee494b7a0fc538d8d3
|