1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
2
3 /*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
40
41 .syntax unified
42 .text
43 .align 2
44
45 #define GET_PCB(tmp) \
46 mrc p15, 0, tmp, c13, c0, 4; \
47 add tmp, tmp, #(TD_PCB)
48
49 /*
50 * r0 = user space address
51 * r1 = kernel space address
52 * r2 = length
53 *
54 * Copies bytes from user space to kernel space
55 */
56 ENTRY(copyin)
57 cmp r2, #0x00
58 movle r0, #0x00
59 movle pc, lr /* Bail early if length is <= 0 */
60
61 adds r3, r0, r2
62 movcs r0, #EFAULT
63 RETc(cs)
64
65 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
66 cmp r3, r12
67 movcs r0, #EFAULT
68 RETc(cs)
69
70 ldr r3, .L_arm_memcpy
71 ldr r3, [r3]
72 cmp r3, #0
73 beq .Lnormal
74 ldr r3, .L_min_memcpy_size
75 ldr r3, [r3]
76 cmp r2, r3
77 blt .Lnormal
78 stmfd sp!, {r0-r2, r4, lr}
79 mov r3, r0
80 mov r0, r1
81 mov r1, r3
82 mov r3, #2 /* SRC_IS_USER */
83 ldr r4, .L_arm_memcpy
84 mov lr, pc
85 ldr pc, [r4]
86 cmp r0, #0
87 ldmfd sp!, {r0-r2, r4, lr}
88 moveq r0, #0
89 RETeq
90
91 .Lnormal:
92 stmfd sp!, {r10-r11, lr}
93
94 GET_PCB(r10)
95 ldr r10, [r10]
96
97 mov r3, #0x00
98 adr ip, .Lcopyin_fault
99 ldr r11, [r10, #PCB_ONFAULT]
100 str ip, [r10, #PCB_ONFAULT]
101 bl .Lcopyin_guts
102 str r11, [r10, #PCB_ONFAULT]
103 mov r0, #0x00
104 ldmfd sp!, {r10-r11, pc}
105
106 .Lcopyin_fault:
107 ldr r0, =EFAULT
108 str r11, [r10, #PCB_ONFAULT]
109 cmp r3, #0x00
110 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
111 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
112 ldmfd sp!, {r10-r11, pc}
113
114 .Lcopyin_guts:
115 pld [r0]
116 /* Word-align the destination buffer */
117 ands ip, r1, #0x03 /* Already word aligned? */
118 beq .Lcopyin_wordaligned /* Yup */
119 rsb ip, ip, #0x04
120 cmp r2, ip /* Enough bytes left to align it? */
121 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
122 sub r2, r2, ip
123 rsbs ip, ip, #0x03
124 addne pc, pc, ip, lsl #3
125 nop
126 ldrbt ip, [r0], #0x01
127 strb ip, [r1], #0x01
128 ldrbt ip, [r0], #0x01
129 strb ip, [r1], #0x01
130 ldrbt ip, [r0], #0x01
131 strb ip, [r1], #0x01
132 cmp r2, #0x00 /* All done? */
133 RETeq
134
135 /* Destination buffer is now word aligned */
136 .Lcopyin_wordaligned:
137 ands ip, r0, #0x03 /* Is src also word-aligned? */
138 bne .Lcopyin_bad_align /* Nope. Things just got bad */
139 cmp r2, #0x08 /* Less than 8 bytes remaining? */
140 blt .Lcopyin_w_less_than8
141
142 /* Quad-align the destination buffer */
143 tst r1, #0x07 /* Already quad aligned? */
144 ldrtne ip, [r0], #0x04
145 strne ip, [r1], #0x04
146 subne r2, r2, #0x04
147 stmfd sp!, {r4-r9} /* Free up some registers */
148 mov r3, #-1 /* Signal restore r4-r9 */
149
150 /* Destination buffer quad aligned, source is word aligned */
151 subs r2, r2, #0x80
152 blt .Lcopyin_w_lessthan128
153
154 /* Copy 128 bytes at a time */
155 .Lcopyin_w_loop128:
156 ldrt r4, [r0], #0x04 /* LD:00-03 */
157 ldrt r5, [r0], #0x04 /* LD:04-07 */
158 pld [r0, #0x18] /* Prefetch 0x20 */
159 ldrt r6, [r0], #0x04 /* LD:08-0b */
160 ldrt r7, [r0], #0x04 /* LD:0c-0f */
161 ldrt r8, [r0], #0x04 /* LD:10-13 */
162 ldrt r9, [r0], #0x04 /* LD:14-17 */
163 strd r4, [r1], #0x08 /* ST:00-07 */
164 ldrt r4, [r0], #0x04 /* LD:18-1b */
165 ldrt r5, [r0], #0x04 /* LD:1c-1f */
166 strd r6, [r1], #0x08 /* ST:08-0f */
167 ldrt r6, [r0], #0x04 /* LD:20-23 */
168 ldrt r7, [r0], #0x04 /* LD:24-27 */
169 pld [r0, #0x18] /* Prefetch 0x40 */
170 strd r8, [r1], #0x08 /* ST:10-17 */
171 ldrt r8, [r0], #0x04 /* LD:28-2b */
172 ldrt r9, [r0], #0x04 /* LD:2c-2f */
173 strd r4, [r1], #0x08 /* ST:18-1f */
174 ldrt r4, [r0], #0x04 /* LD:30-33 */
175 ldrt r5, [r0], #0x04 /* LD:34-37 */
176 strd r6, [r1], #0x08 /* ST:20-27 */
177 ldrt r6, [r0], #0x04 /* LD:38-3b */
178 ldrt r7, [r0], #0x04 /* LD:3c-3f */
179 strd r8, [r1], #0x08 /* ST:28-2f */
180 ldrt r8, [r0], #0x04 /* LD:40-43 */
181 ldrt r9, [r0], #0x04 /* LD:44-47 */
182 pld [r0, #0x18] /* Prefetch 0x60 */
183 strd r4, [r1], #0x08 /* ST:30-37 */
184 ldrt r4, [r0], #0x04 /* LD:48-4b */
185 ldrt r5, [r0], #0x04 /* LD:4c-4f */
186 strd r6, [r1], #0x08 /* ST:38-3f */
187 ldrt r6, [r0], #0x04 /* LD:50-53 */
188 ldrt r7, [r0], #0x04 /* LD:54-57 */
189 strd r8, [r1], #0x08 /* ST:40-47 */
190 ldrt r8, [r0], #0x04 /* LD:58-5b */
191 ldrt r9, [r0], #0x04 /* LD:5c-5f */
192 strd r4, [r1], #0x08 /* ST:48-4f */
193 ldrt r4, [r0], #0x04 /* LD:60-63 */
194 ldrt r5, [r0], #0x04 /* LD:64-67 */
195 pld [r0, #0x18] /* Prefetch 0x80 */
196 strd r6, [r1], #0x08 /* ST:50-57 */
197 ldrt r6, [r0], #0x04 /* LD:68-6b */
198 ldrt r7, [r0], #0x04 /* LD:6c-6f */
199 strd r8, [r1], #0x08 /* ST:58-5f */
200 ldrt r8, [r0], #0x04 /* LD:70-73 */
201 ldrt r9, [r0], #0x04 /* LD:74-77 */
202 strd r4, [r1], #0x08 /* ST:60-67 */
203 ldrt r4, [r0], #0x04 /* LD:78-7b */
204 ldrt r5, [r0], #0x04 /* LD:7c-7f */
205 strd r6, [r1], #0x08 /* ST:68-6f */
206 strd r8, [r1], #0x08 /* ST:70-77 */
207 subs r2, r2, #0x80
208 strd r4, [r1], #0x08 /* ST:78-7f */
209 bge .Lcopyin_w_loop128
210
211 .Lcopyin_w_lessthan128:
212 adds r2, r2, #0x80 /* Adjust for extra sub */
213 ldmfdeq sp!, {r4-r9}
214 RETeq
215 subs r2, r2, #0x20
216 blt .Lcopyin_w_lessthan32
217
218 /* Copy 32 bytes at a time */
219 .Lcopyin_w_loop32:
220 ldrt r4, [r0], #0x04
221 ldrt r5, [r0], #0x04
222 pld [r0, #0x18]
223 ldrt r6, [r0], #0x04
224 ldrt r7, [r0], #0x04
225 ldrt r8, [r0], #0x04
226 ldrt r9, [r0], #0x04
227 strd r4, [r1], #0x08
228 ldrt r4, [r0], #0x04
229 ldrt r5, [r0], #0x04
230 strd r6, [r1], #0x08
231 strd r8, [r1], #0x08
232 subs r2, r2, #0x20
233 strd r4, [r1], #0x08
234 bge .Lcopyin_w_loop32
235
236 .Lcopyin_w_lessthan32:
237 adds r2, r2, #0x20 /* Adjust for extra sub */
238 ldmfdeq sp!, {r4-r9}
239 RETeq /* Return now if done */
240
241 and r4, r2, #0x18
242 rsb r5, r4, #0x18
243 subs r2, r2, r4
244 add pc, pc, r5, lsl #1
245 nop
246
247 /* At least 24 bytes remaining */
248 ldrt r4, [r0], #0x04
249 ldrt r5, [r0], #0x04
250 nop
251 strd r4, [r1], #0x08
252
253 /* At least 16 bytes remaining */
254 ldrt r4, [r0], #0x04
255 ldrt r5, [r0], #0x04
256 nop
257 strd r4, [r1], #0x08
258
259 /* At least 8 bytes remaining */
260 ldrt r4, [r0], #0x04
261 ldrt r5, [r0], #0x04
262 nop
263 strd r4, [r1], #0x08
264
265 /* Less than 8 bytes remaining */
266 ldmfd sp!, {r4-r9}
267 RETeq /* Return now if done */
268 mov r3, #0x00
269
270 .Lcopyin_w_less_than8:
271 subs r2, r2, #0x04
272 ldrtge ip, [r0], #0x04
273 strge ip, [r1], #0x04
274 RETeq /* Return now if done */
275 addlt r2, r2, #0x04
276 ldrbt ip, [r0], #0x01
277 cmp r2, #0x02
278 ldrbtge r2, [r0], #0x01
279 strb ip, [r1], #0x01
280 ldrbtgt ip, [r0]
281 strbge r2, [r1], #0x01
282 strbgt ip, [r1]
283 RET
284
285 /*
286 * At this point, it has not been possible to word align both buffers.
287 * The destination buffer (r1) is word aligned, but the source buffer
288 * (r0) is not.
289 */
290 .Lcopyin_bad_align:
291 stmfd sp!, {r4-r7}
292 mov r3, #0x01
293 bic r0, r0, #0x03
294 cmp ip, #2
295 ldrt ip, [r0], #0x04
296 bgt .Lcopyin_bad3
297 beq .Lcopyin_bad2
298 b .Lcopyin_bad1
299
300 .Lcopyin_bad1_loop16:
301 mov r4, ip, lsr #8
302 ldrt r5, [r0], #0x04
303 pld [r0, #0x018]
304 ldrt r6, [r0], #0x04
305 ldrt r7, [r0], #0x04
306 ldrt ip, [r0], #0x04
307 orr r4, r4, r5, lsl #24
308 mov r5, r5, lsr #8
309 orr r5, r5, r6, lsl #24
310 mov r6, r6, lsr #8
311 orr r6, r6, r7, lsl #24
312 mov r7, r7, lsr #8
313 orr r7, r7, ip, lsl #24
314 str r4, [r1], #0x04
315 str r5, [r1], #0x04
316 str r6, [r1], #0x04
317 str r7, [r1], #0x04
318 .Lcopyin_bad1:
319 subs r2, r2, #0x10
320 bge .Lcopyin_bad1_loop16
321
322 adds r2, r2, #0x10
323 ldmfdeq sp!, {r4-r7}
324 RETeq /* Return now if done */
325 subs r2, r2, #0x04
326 sublt r0, r0, #0x03
327 blt .Lcopyin_l4
328
329 .Lcopyin_bad1_loop4:
330 mov r4, ip, lsr #8
331 ldrt ip, [r0], #0x04
332 subs r2, r2, #0x04
333 orr r4, r4, ip, lsl #24
334 str r4, [r1], #0x04
335 bge .Lcopyin_bad1_loop4
336 sub r0, r0, #0x03
337 b .Lcopyin_l4
338
339 .Lcopyin_bad2_loop16:
340 mov r4, ip, lsr #16
341 ldrt r5, [r0], #0x04
342 pld [r0, #0x018]
343 ldrt r6, [r0], #0x04
344 ldrt r7, [r0], #0x04
345 ldrt ip, [r0], #0x04
346 orr r4, r4, r5, lsl #16
347 mov r5, r5, lsr #16
348 orr r5, r5, r6, lsl #16
349 mov r6, r6, lsr #16
350 orr r6, r6, r7, lsl #16
351 mov r7, r7, lsr #16
352 orr r7, r7, ip, lsl #16
353 str r4, [r1], #0x04
354 str r5, [r1], #0x04
355 str r6, [r1], #0x04
356 str r7, [r1], #0x04
357 .Lcopyin_bad2:
358 subs r2, r2, #0x10
359 bge .Lcopyin_bad2_loop16
360
361 adds r2, r2, #0x10
362 ldmfdeq sp!, {r4-r7}
363 RETeq /* Return now if done */
364 subs r2, r2, #0x04
365 sublt r0, r0, #0x02
366 blt .Lcopyin_l4
367
368 .Lcopyin_bad2_loop4:
369 mov r4, ip, lsr #16
370 ldrt ip, [r0], #0x04
371 subs r2, r2, #0x04
372 orr r4, r4, ip, lsl #16
373 str r4, [r1], #0x04
374 bge .Lcopyin_bad2_loop4
375 sub r0, r0, #0x02
376 b .Lcopyin_l4
377
378 .Lcopyin_bad3_loop16:
379 mov r4, ip, lsr #24
380 ldrt r5, [r0], #0x04
381 pld [r0, #0x018]
382 ldrt r6, [r0], #0x04
383 ldrt r7, [r0], #0x04
384 ldrt ip, [r0], #0x04
385 orr r4, r4, r5, lsl #8
386 mov r5, r5, lsr #24
387 orr r5, r5, r6, lsl #8
388 mov r6, r6, lsr #24
389 orr r6, r6, r7, lsl #8
390 mov r7, r7, lsr #24
391 orr r7, r7, ip, lsl #8
392 str r4, [r1], #0x04
393 str r5, [r1], #0x04
394 str r6, [r1], #0x04
395 str r7, [r1], #0x04
396 .Lcopyin_bad3:
397 subs r2, r2, #0x10
398 bge .Lcopyin_bad3_loop16
399
400 adds r2, r2, #0x10
401 ldmfdeq sp!, {r4-r7}
402 RETeq /* Return now if done */
403 subs r2, r2, #0x04
404 sublt r0, r0, #0x01
405 blt .Lcopyin_l4
406
407 .Lcopyin_bad3_loop4:
408 mov r4, ip, lsr #24
409 ldrt ip, [r0], #0x04
410 subs r2, r2, #0x04
411 orr r4, r4, ip, lsl #8
412 str r4, [r1], #0x04
413 bge .Lcopyin_bad3_loop4
414 sub r0, r0, #0x01
415
416 .Lcopyin_l4:
417 ldmfd sp!, {r4-r7}
418 mov r3, #0x00
419 adds r2, r2, #0x04
420 RETeq
421 .Lcopyin_l4_2:
422 rsbs r2, r2, #0x03
423 addne pc, pc, r2, lsl #3
424 nop
425 ldrbt ip, [r0], #0x01
426 strb ip, [r1], #0x01
427 ldrbt ip, [r0], #0x01
428 strb ip, [r1], #0x01
429 ldrbt ip, [r0]
430 strb ip, [r1]
431 RET
432 END(copyin)
433
434 /*
435 * r0 = kernel space address
436 * r1 = user space address
437 * r2 = length
438 *
439 * Copies bytes from kernel space to user space
440 */
441 ENTRY(copyout)
442 cmp r2, #0x00
443 movle r0, #0x00
444 movle pc, lr /* Bail early if length is <= 0 */
445
446 adds r3, r1, r2
447 movcs r0, #EFAULT
448 RETc(cs)
449
450 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
451 cmp r3, r12
452 movcs r0, #EFAULT
453 RETc(cs)
454
455 ldr r3, .L_arm_memcpy
456 ldr r3, [r3]
457 cmp r3, #0
458 beq .Lnormale
459 ldr r3, .L_min_memcpy_size
460 ldr r3, [r3]
461 cmp r2, r3
462 blt .Lnormale
463 stmfd sp!, {r0-r2, r4, lr}
464 mov r3, r0
465 mov r0, r1
466 mov r1, r3
467 mov r3, #1 /* DST_IS_USER */
468 ldr r4, .L_arm_memcpy
469 mov lr, pc
470 ldr pc, [r4]
471 cmp r0, #0
472 ldmfd sp!, {r0-r2, r4, lr}
473 moveq r0, #0
474 RETeq
475
476 .Lnormale:
477 stmfd sp!, {r10-r11, lr}
478
479 GET_PCB(r10)
480 ldr r10, [r10]
481
482 mov r3, #0x00
483 adr ip, .Lcopyout_fault
484 ldr r11, [r10, #PCB_ONFAULT]
485 str ip, [r10, #PCB_ONFAULT]
486 bl .Lcopyout_guts
487 str r11, [r10, #PCB_ONFAULT]
488 mov r0, #0x00
489 ldmfd sp!, {r10-r11, pc}
490
491 .Lcopyout_fault:
492 ldr r0, =EFAULT
493 str r11, [r10, #PCB_ONFAULT]
494 cmp r3, #0x00
495 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
496 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
497 ldmfd sp!, {r10-r11, pc}
498
499 .Lcopyout_guts:
500 pld [r0]
501 /* Word-align the destination buffer */
502 ands ip, r1, #0x03 /* Already word aligned? */
503 beq .Lcopyout_wordaligned /* Yup */
504 rsb ip, ip, #0x04
505 cmp r2, ip /* Enough bytes left to align it? */
506 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
507 sub r2, r2, ip
508 rsbs ip, ip, #0x03
509 addne pc, pc, ip, lsl #3
510 nop
511 ldrb ip, [r0], #0x01
512 strbt ip, [r1], #0x01
513 ldrb ip, [r0], #0x01
514 strbt ip, [r1], #0x01
515 ldrb ip, [r0], #0x01
516 strbt ip, [r1], #0x01
517 cmp r2, #0x00 /* All done? */
518 RETeq
519
520 /* Destination buffer is now word aligned */
521 .Lcopyout_wordaligned:
522 ands ip, r0, #0x03 /* Is src also word-aligned? */
523 bne .Lcopyout_bad_align /* Nope. Things just got bad */
524 cmp r2, #0x08 /* Less than 8 bytes remaining? */
525 blt .Lcopyout_w_less_than8
526
527 /* Quad-align the destination buffer */
528 tst r0, #0x07 /* Already quad aligned? */
529 ldrne ip, [r0], #0x04
530 subne r2, r2, #0x04
531 strtne ip, [r1], #0x04
532
533 stmfd sp!, {r4-r9} /* Free up some registers */
534 mov r3, #-1 /* Signal restore r4-r9 */
535
536 /* Destination buffer word aligned, source is quad aligned */
537 subs r2, r2, #0x80
538 blt .Lcopyout_w_lessthan128
539
540 /* Copy 128 bytes at a time */
541 .Lcopyout_w_loop128:
542 ldrd r4, [r0], #0x08 /* LD:00-07 */
543 pld [r0, #0x18] /* Prefetch 0x20 */
544 ldrd r6, [r0], #0x08 /* LD:08-0f */
545 ldrd r8, [r0], #0x08 /* LD:10-17 */
546 strt r4, [r1], #0x04 /* ST:00-03 */
547 strt r5, [r1], #0x04 /* ST:04-07 */
548 ldrd r4, [r0], #0x08 /* LD:18-1f */
549 strt r6, [r1], #0x04 /* ST:08-0b */
550 strt r7, [r1], #0x04 /* ST:0c-0f */
551 ldrd r6, [r0], #0x08 /* LD:20-27 */
552 pld [r0, #0x18] /* Prefetch 0x40 */
553 strt r8, [r1], #0x04 /* ST:10-13 */
554 strt r9, [r1], #0x04 /* ST:14-17 */
555 ldrd r8, [r0], #0x08 /* LD:28-2f */
556 strt r4, [r1], #0x04 /* ST:18-1b */
557 strt r5, [r1], #0x04 /* ST:1c-1f */
558 ldrd r4, [r0], #0x08 /* LD:30-37 */
559 strt r6, [r1], #0x04 /* ST:20-23 */
560 strt r7, [r1], #0x04 /* ST:24-27 */
561 ldrd r6, [r0], #0x08 /* LD:38-3f */
562 strt r8, [r1], #0x04 /* ST:28-2b */
563 strt r9, [r1], #0x04 /* ST:2c-2f */
564 ldrd r8, [r0], #0x08 /* LD:40-47 */
565 pld [r0, #0x18] /* Prefetch 0x60 */
566 strt r4, [r1], #0x04 /* ST:30-33 */
567 strt r5, [r1], #0x04 /* ST:34-37 */
568 ldrd r4, [r0], #0x08 /* LD:48-4f */
569 strt r6, [r1], #0x04 /* ST:38-3b */
570 strt r7, [r1], #0x04 /* ST:3c-3f */
571 ldrd r6, [r0], #0x08 /* LD:50-57 */
572 strt r8, [r1], #0x04 /* ST:40-43 */
573 strt r9, [r1], #0x04 /* ST:44-47 */
574 ldrd r8, [r0], #0x08 /* LD:58-4f */
575 strt r4, [r1], #0x04 /* ST:48-4b */
576 strt r5, [r1], #0x04 /* ST:4c-4f */
577 ldrd r4, [r0], #0x08 /* LD:60-67 */
578 pld [r0, #0x18] /* Prefetch 0x80 */
579 strt r6, [r1], #0x04 /* ST:50-53 */
580 strt r7, [r1], #0x04 /* ST:54-57 */
581 ldrd r6, [r0], #0x08 /* LD:68-6f */
582 strt r8, [r1], #0x04 /* ST:58-5b */
583 strt r9, [r1], #0x04 /* ST:5c-5f */
584 ldrd r8, [r0], #0x08 /* LD:70-77 */
585 strt r4, [r1], #0x04 /* ST:60-63 */
586 strt r5, [r1], #0x04 /* ST:64-67 */
587 ldrd r4, [r0], #0x08 /* LD:78-7f */
588 strt r6, [r1], #0x04 /* ST:68-6b */
589 strt r7, [r1], #0x04 /* ST:6c-6f */
590 strt r8, [r1], #0x04 /* ST:70-73 */
591 strt r9, [r1], #0x04 /* ST:74-77 */
592 subs r2, r2, #0x80
593 strt r4, [r1], #0x04 /* ST:78-7b */
594 strt r5, [r1], #0x04 /* ST:7c-7f */
595 bge .Lcopyout_w_loop128
596
597 .Lcopyout_w_lessthan128:
598 adds r2, r2, #0x80 /* Adjust for extra sub */
599 ldmfdeq sp!, {r4-r9}
600 RETeq /* Return now if done */
601 subs r2, r2, #0x20
602 blt .Lcopyout_w_lessthan32
603
604 /* Copy 32 bytes at a time */
605 .Lcopyout_w_loop32:
606 ldrd r4, [r0], #0x08
607 pld [r0, #0x18]
608 ldrd r6, [r0], #0x08
609 ldrd r8, [r0], #0x08
610 strt r4, [r1], #0x04
611 strt r5, [r1], #0x04
612 ldrd r4, [r0], #0x08
613 strt r6, [r1], #0x04
614 strt r7, [r1], #0x04
615 strt r8, [r1], #0x04
616 strt r9, [r1], #0x04
617 subs r2, r2, #0x20
618 strt r4, [r1], #0x04
619 strt r5, [r1], #0x04
620 bge .Lcopyout_w_loop32
621
622 .Lcopyout_w_lessthan32:
623 adds r2, r2, #0x20 /* Adjust for extra sub */
624 ldmfdeq sp!, {r4-r9}
625 RETeq /* Return now if done */
626
627 and r4, r2, #0x18
628 rsb r5, r4, #0x18
629 subs r2, r2, r4
630 add pc, pc, r5, lsl #1
631 nop
632
633 /* At least 24 bytes remaining */
634 ldrd r4, [r0], #0x08
635 strt r4, [r1], #0x04
636 strt r5, [r1], #0x04
637 nop
638
639 /* At least 16 bytes remaining */
640 ldrd r4, [r0], #0x08
641 strt r4, [r1], #0x04
642 strt r5, [r1], #0x04
643 nop
644
645 /* At least 8 bytes remaining */
646 ldrd r4, [r0], #0x08
647 strt r4, [r1], #0x04
648 strt r5, [r1], #0x04
649 nop
650
651 /* Less than 8 bytes remaining */
652 ldmfd sp!, {r4-r9}
653 RETeq /* Return now if done */
654 mov r3, #0x00
655
656 .Lcopyout_w_less_than8:
657 subs r2, r2, #0x04
658 ldrge ip, [r0], #0x04
659 strtge ip, [r1], #0x04
660 RETeq /* Return now if done */
661 addlt r2, r2, #0x04
662 ldrb ip, [r0], #0x01
663 cmp r2, #0x02
664 ldrbge r2, [r0], #0x01
665 strbt ip, [r1], #0x01
666 ldrbgt ip, [r0]
667 strbtge r2, [r1], #0x01
668 strbtgt ip, [r1]
669 RET
670
671 /*
672 * At this point, it has not been possible to word align both buffers.
673 * The destination buffer (r1) is word aligned, but the source buffer
674 * (r0) is not.
675 */
676 .Lcopyout_bad_align:
677 stmfd sp!, {r4-r7}
678 mov r3, #0x01
679 bic r0, r0, #0x03
680 cmp ip, #2
681 ldr ip, [r0], #0x04
682 bgt .Lcopyout_bad3
683 beq .Lcopyout_bad2
684 b .Lcopyout_bad1
685
686 .Lcopyout_bad1_loop16:
687 mov r4, ip, lsr #8
688 ldr r5, [r0], #0x04
689 pld [r0, #0x018]
690 ldr r6, [r0], #0x04
691 ldr r7, [r0], #0x04
692 ldr ip, [r0], #0x04
693 orr r4, r4, r5, lsl #24
694 mov r5, r5, lsr #8
695 orr r5, r5, r6, lsl #24
696 mov r6, r6, lsr #8
697 orr r6, r6, r7, lsl #24
698 mov r7, r7, lsr #8
699 orr r7, r7, ip, lsl #24
700 strt r4, [r1], #0x04
701 strt r5, [r1], #0x04
702 strt r6, [r1], #0x04
703 strt r7, [r1], #0x04
704 .Lcopyout_bad1:
705 subs r2, r2, #0x10
706 bge .Lcopyout_bad1_loop16
707
708 adds r2, r2, #0x10
709 ldmfdeq sp!, {r4-r7}
710 RETeq /* Return now if done */
711 subs r2, r2, #0x04
712 sublt r0, r0, #0x03
713 blt .Lcopyout_l4
714
715 .Lcopyout_bad1_loop4:
716 mov r4, ip, lsr #8
717 ldr ip, [r0], #0x04
718 subs r2, r2, #0x04
719 orr r4, r4, ip, lsl #24
720 strt r4, [r1], #0x04
721 bge .Lcopyout_bad1_loop4
722 sub r0, r0, #0x03
723 b .Lcopyout_l4
724
725 .Lcopyout_bad2_loop16:
726 mov r4, ip, lsr #16
727 ldr r5, [r0], #0x04
728 pld [r0, #0x018]
729 ldr r6, [r0], #0x04
730 ldr r7, [r0], #0x04
731 ldr ip, [r0], #0x04
732 orr r4, r4, r5, lsl #16
733 mov r5, r5, lsr #16
734 orr r5, r5, r6, lsl #16
735 mov r6, r6, lsr #16
736 orr r6, r6, r7, lsl #16
737 mov r7, r7, lsr #16
738 orr r7, r7, ip, lsl #16
739 strt r4, [r1], #0x04
740 strt r5, [r1], #0x04
741 strt r6, [r1], #0x04
742 strt r7, [r1], #0x04
743 .Lcopyout_bad2:
744 subs r2, r2, #0x10
745 bge .Lcopyout_bad2_loop16
746
747 adds r2, r2, #0x10
748 ldmfdeq sp!, {r4-r7}
749 RETeq /* Return now if done */
750 subs r2, r2, #0x04
751 sublt r0, r0, #0x02
752 blt .Lcopyout_l4
753
754 .Lcopyout_bad2_loop4:
755 mov r4, ip, lsr #16
756 ldr ip, [r0], #0x04
757 subs r2, r2, #0x04
758 orr r4, r4, ip, lsl #16
759 strt r4, [r1], #0x04
760 bge .Lcopyout_bad2_loop4
761 sub r0, r0, #0x02
762 b .Lcopyout_l4
763
764 .Lcopyout_bad3_loop16:
765 mov r4, ip, lsr #24
766 ldr r5, [r0], #0x04
767 pld [r0, #0x018]
768 ldr r6, [r0], #0x04
769 ldr r7, [r0], #0x04
770 ldr ip, [r0], #0x04
771 orr r4, r4, r5, lsl #8
772 mov r5, r5, lsr #24
773 orr r5, r5, r6, lsl #8
774 mov r6, r6, lsr #24
775 orr r6, r6, r7, lsl #8
776 mov r7, r7, lsr #24
777 orr r7, r7, ip, lsl #8
778 strt r4, [r1], #0x04
779 strt r5, [r1], #0x04
780 strt r6, [r1], #0x04
781 strt r7, [r1], #0x04
782 .Lcopyout_bad3:
783 subs r2, r2, #0x10
784 bge .Lcopyout_bad3_loop16
785
786 adds r2, r2, #0x10
787 ldmfdeq sp!, {r4-r7}
788 RETeq /* Return now if done */
789 subs r2, r2, #0x04
790 sublt r0, r0, #0x01
791 blt .Lcopyout_l4
792
793 .Lcopyout_bad3_loop4:
794 mov r4, ip, lsr #24
795 ldr ip, [r0], #0x04
796 subs r2, r2, #0x04
797 orr r4, r4, ip, lsl #8
798 strt r4, [r1], #0x04
799 bge .Lcopyout_bad3_loop4
800 sub r0, r0, #0x01
801
802 .Lcopyout_l4:
803 ldmfd sp!, {r4-r7}
804 mov r3, #0x00
805 adds r2, r2, #0x04
806 RETeq
807 .Lcopyout_l4_2:
808 rsbs r2, r2, #0x03
809 addne pc, pc, r2, lsl #3
810 nop
811 ldrb ip, [r0], #0x01
812 strbt ip, [r1], #0x01
813 ldrb ip, [r0], #0x01
814 strbt ip, [r1], #0x01
815 ldrb ip, [r0]
816 strbt ip, [r1]
817 RET
818 END(copyout)
819
Cache object: 2fb4a7aec396308e3fba1b9cb9102dfc
|