1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
2
3 /*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD: releng/6.2/sys/arm/arm/bcopyinout_xscale.S 146604 2005-05-24 23:55:09Z cognet $");
40
41 .text
42 .align 0
43
44 #ifdef MULTIPROCESSOR
45 .Lcpu_info:
46 .word _C_LABEL(cpu_info)
47 #else
48 .Lcurpcb:
49 .word _C_LABEL(__pcpu) + PC_CURPCB
50 #endif
51
52 /*
53 * r0 = user space address
54 * r1 = kernel space address
55 * r2 = length
56 *
57 * Copies bytes from user space to kernel space
58 */
59 ENTRY(copyin)
60 cmp r2, #0x00
61 movle r0, #0x00
62 movle pc, lr /* Bail early if length is <= 0 */
63
64 stmfd sp!, {r10-r11, lr}
65
66 #ifdef MULTIPROCESSOR
67 /* XXX Probably not appropriate for non-Hydra SMPs */
68 stmfd sp!, {r0-r2}
69 bl _C_LABEL(cpu_number)
70 ldr r10, .Lcpu_info
71 ldmfd sp!, {r0-r2}
72 ldr r10, [r10, r0, lsl #2]
73 ldr r10, [r10, #CI_CURPCB]
74 #else
75 ldr r10, .Lcurpcb
76 ldr r10, [r10]
77 #endif
78
79 mov r3, #0x00
80 adr ip, .Lcopyin_fault
81 ldr r11, [r10, #PCB_ONFAULT]
82 str ip, [r10, #PCB_ONFAULT]
83 bl .Lcopyin_guts
84 str r11, [r10, #PCB_ONFAULT]
85 mov r0, #0x00
86 ldmfd sp!, {r10-r11, pc}
87
88 .Lcopyin_fault:
89 str r11, [r10, #PCB_ONFAULT]
90 cmp r3, #0x00
91 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
92 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
93 ldmfd sp!, {r10-r11, pc}
94
95 .Lcopyin_guts:
96 pld [r0]
97 /* Word-align the destination buffer */
98 ands ip, r1, #0x03 /* Already word aligned? */
99 beq .Lcopyin_wordaligned /* Yup */
100 rsb ip, ip, #0x04
101 cmp r2, ip /* Enough bytes left to align it? */
102 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
103 sub r2, r2, ip
104 rsbs ip, ip, #0x03
105 addne pc, pc, ip, lsl #3
106 nop
107 ldrbt ip, [r0], #0x01
108 strb ip, [r1], #0x01
109 ldrbt ip, [r0], #0x01
110 strb ip, [r1], #0x01
111 ldrbt ip, [r0], #0x01
112 strb ip, [r1], #0x01
113 cmp r2, #0x00 /* All done? */
114 RETeq
115
116 /* Destination buffer is now word aligned */
117 .Lcopyin_wordaligned:
118 ands ip, r0, #0x03 /* Is src also word-aligned? */
119 bne .Lcopyin_bad_align /* Nope. Things just got bad */
120 cmp r2, #0x08 /* Less than 8 bytes remaining? */
121 blt .Lcopyin_w_less_than8
122
123 /* Quad-align the destination buffer */
124 tst r1, #0x07 /* Already quad aligned? */
125 ldrnet ip, [r0], #0x04
126 stmfd sp!, {r4-r9} /* Free up some registers */
127 mov r3, #-1 /* Signal restore r4-r9 */
128 tst r1, #0x07 /* XXX: bug work-around */
129 subne r2, r2, #0x04
130 strne ip, [r1], #0x04
131
132 /* Destination buffer quad aligned, source is word aligned */
133 subs r2, r2, #0x80
134 blt .Lcopyin_w_lessthan128
135
136 /* Copy 128 bytes at a time */
137 .Lcopyin_w_loop128:
138 ldrt r4, [r0], #0x04 /* LD:00-03 */
139 ldrt r5, [r0], #0x04 /* LD:04-07 */
140 pld [r0, #0x18] /* Prefetch 0x20 */
141 ldrt r6, [r0], #0x04 /* LD:08-0b */
142 ldrt r7, [r0], #0x04 /* LD:0c-0f */
143 ldrt r8, [r0], #0x04 /* LD:10-13 */
144 ldrt r9, [r0], #0x04 /* LD:14-17 */
145 strd r4, [r1], #0x08 /* ST:00-07 */
146 ldrt r4, [r0], #0x04 /* LD:18-1b */
147 ldrt r5, [r0], #0x04 /* LD:1c-1f */
148 strd r6, [r1], #0x08 /* ST:08-0f */
149 ldrt r6, [r0], #0x04 /* LD:20-23 */
150 ldrt r7, [r0], #0x04 /* LD:24-27 */
151 pld [r0, #0x18] /* Prefetch 0x40 */
152 strd r8, [r1], #0x08 /* ST:10-17 */
153 ldrt r8, [r0], #0x04 /* LD:28-2b */
154 ldrt r9, [r0], #0x04 /* LD:2c-2f */
155 strd r4, [r1], #0x08 /* ST:18-1f */
156 ldrt r4, [r0], #0x04 /* LD:30-33 */
157 ldrt r5, [r0], #0x04 /* LD:34-37 */
158 strd r6, [r1], #0x08 /* ST:20-27 */
159 ldrt r6, [r0], #0x04 /* LD:38-3b */
160 ldrt r7, [r0], #0x04 /* LD:3c-3f */
161 strd r8, [r1], #0x08 /* ST:28-2f */
162 ldrt r8, [r0], #0x04 /* LD:40-43 */
163 ldrt r9, [r0], #0x04 /* LD:44-47 */
164 pld [r0, #0x18] /* Prefetch 0x60 */
165 strd r4, [r1], #0x08 /* ST:30-37 */
166 ldrt r4, [r0], #0x04 /* LD:48-4b */
167 ldrt r5, [r0], #0x04 /* LD:4c-4f */
168 strd r6, [r1], #0x08 /* ST:38-3f */
169 ldrt r6, [r0], #0x04 /* LD:50-53 */
170 ldrt r7, [r0], #0x04 /* LD:54-57 */
171 strd r8, [r1], #0x08 /* ST:40-47 */
172 ldrt r8, [r0], #0x04 /* LD:58-5b */
173 ldrt r9, [r0], #0x04 /* LD:5c-5f */
174 strd r4, [r1], #0x08 /* ST:48-4f */
175 ldrt r4, [r0], #0x04 /* LD:60-63 */
176 ldrt r5, [r0], #0x04 /* LD:64-67 */
177 pld [r0, #0x18] /* Prefetch 0x80 */
178 strd r6, [r1], #0x08 /* ST:50-57 */
179 ldrt r6, [r0], #0x04 /* LD:68-6b */
180 ldrt r7, [r0], #0x04 /* LD:6c-6f */
181 strd r8, [r1], #0x08 /* ST:58-5f */
182 ldrt r8, [r0], #0x04 /* LD:70-73 */
183 ldrt r9, [r0], #0x04 /* LD:74-77 */
184 strd r4, [r1], #0x08 /* ST:60-67 */
185 ldrt r4, [r0], #0x04 /* LD:78-7b */
186 ldrt r5, [r0], #0x04 /* LD:7c-7f */
187 strd r6, [r1], #0x08 /* ST:68-6f */
188 strd r8, [r1], #0x08 /* ST:70-77 */
189 subs r2, r2, #0x80
190 strd r4, [r1], #0x08 /* ST:78-7f */
191 bge .Lcopyin_w_loop128
192
193 .Lcopyin_w_lessthan128:
194 adds r2, r2, #0x80 /* Adjust for extra sub */
195 ldmeqfd sp!, {r4-r9}
196 RETeq
197 subs r2, r2, #0x20
198 blt .Lcopyin_w_lessthan32
199
200 /* Copy 32 bytes at a time */
201 .Lcopyin_w_loop32:
202 ldrt r4, [r0], #0x04
203 ldrt r5, [r0], #0x04
204 pld [r0, #0x18]
205 ldrt r6, [r0], #0x04
206 ldrt r7, [r0], #0x04
207 ldrt r8, [r0], #0x04
208 ldrt r9, [r0], #0x04
209 strd r4, [r1], #0x08
210 ldrt r4, [r0], #0x04
211 ldrt r5, [r0], #0x04
212 strd r6, [r1], #0x08
213 strd r8, [r1], #0x08
214 subs r2, r2, #0x20
215 strd r4, [r1], #0x08
216 bge .Lcopyin_w_loop32
217
218 .Lcopyin_w_lessthan32:
219 adds r2, r2, #0x20 /* Adjust for extra sub */
220 ldmeqfd sp!, {r4-r9}
221 RETeq /* Return now if done */
222
223 and r4, r2, #0x18
224 rsb r5, r4, #0x18
225 subs r2, r2, r4
226 add pc, pc, r5, lsl #1
227 nop
228
229 /* At least 24 bytes remaining */
230 ldrt r4, [r0], #0x04
231 ldrt r5, [r0], #0x04
232 nop
233 strd r4, [r1], #0x08
234
235 /* At least 16 bytes remaining */
236 ldrt r4, [r0], #0x04
237 ldrt r5, [r0], #0x04
238 nop
239 strd r4, [r1], #0x08
240
241 /* At least 8 bytes remaining */
242 ldrt r4, [r0], #0x04
243 ldrt r5, [r0], #0x04
244 nop
245 strd r4, [r1], #0x08
246
247 /* Less than 8 bytes remaining */
248 ldmfd sp!, {r4-r9}
249 RETeq /* Return now if done */
250 mov r3, #0x00
251
252 .Lcopyin_w_less_than8:
253 subs r2, r2, #0x04
254 ldrget ip, [r0], #0x04
255 strge ip, [r1], #0x04
256 RETeq /* Return now if done */
257 addlt r2, r2, #0x04
258 ldrbt ip, [r0], #0x01
259 cmp r2, #0x02
260 ldrgebt r2, [r0], #0x01
261 strb ip, [r1], #0x01
262 ldrgtbt ip, [r0]
263 strgeb r2, [r1], #0x01
264 strgtb ip, [r1]
265 RET
266
267 /*
268 * At this point, it has not been possible to word align both buffers.
269 * The destination buffer (r1) is word aligned, but the source buffer
270 * (r0) is not.
271 */
272 .Lcopyin_bad_align:
273 stmfd sp!, {r4-r7}
274 mov r3, #0x01
275 bic r0, r0, #0x03
276 cmp ip, #2
277 ldrt ip, [r0], #0x04
278 bgt .Lcopyin_bad3
279 beq .Lcopyin_bad2
280 b .Lcopyin_bad1
281
282 .Lcopyin_bad1_loop16:
283 #ifdef __ARMEB__
284 mov r4, ip, lsl #8
285 #else
286 mov r4, ip, lsr #8
287 #endif
288 ldrt r5, [r0], #0x04
289 pld [r0, #0x018]
290 ldrt r6, [r0], #0x04
291 ldrt r7, [r0], #0x04
292 ldrt ip, [r0], #0x04
293 #ifdef __ARMEB__
294 orr r4, r4, r5, lsr #24
295 mov r5, r5, lsl #8
296 orr r5, r5, r6, lsr #24
297 mov r6, r6, lsl #8
298 orr r6, r6, r7, lsr #24
299 mov r7, r7, lsl #8
300 orr r7, r7, ip, lsr #24
301 #else
302 orr r4, r4, r5, lsl #24
303 mov r5, r5, lsr #8
304 orr r5, r5, r6, lsl #24
305 mov r6, r6, lsr #8
306 orr r6, r6, r7, lsl #24
307 mov r7, r7, lsr #8
308 orr r7, r7, ip, lsl #24
309 #endif
310 str r4, [r1], #0x04
311 str r5, [r1], #0x04
312 str r6, [r1], #0x04
313 str r7, [r1], #0x04
314 .Lcopyin_bad1:
315 subs r2, r2, #0x10
316 bge .Lcopyin_bad1_loop16
317
318 adds r2, r2, #0x10
319 ldmeqfd sp!, {r4-r7}
320 RETeq /* Return now if done */
321 subs r2, r2, #0x04
322 sublt r0, r0, #0x03
323 blt .Lcopyin_l4
324
325 .Lcopyin_bad1_loop4:
326 #ifdef __ARMEB__
327 mov r4, ip, lsl #8
328 #else
329 mov r4, ip, lsr #8
330 #endif
331 ldrt ip, [r0], #0x04
332 subs r2, r2, #0x04
333 #ifdef __ARMEB__
334 orr r4, r4, ip, lsr #24
335 #else
336 orr r4, r4, ip, lsl #24
337 #endif
338 str r4, [r1], #0x04
339 bge .Lcopyin_bad1_loop4
340 sub r0, r0, #0x03
341 b .Lcopyin_l4
342
343 .Lcopyin_bad2_loop16:
344 #ifdef __ARMEB__
345 mov r4, ip, lsl #16
346 #else
347 mov r4, ip, lsr #16
348 #endif
349 ldrt r5, [r0], #0x04
350 pld [r0, #0x018]
351 ldrt r6, [r0], #0x04
352 ldrt r7, [r0], #0x04
353 ldrt ip, [r0], #0x04
354 #ifdef __ARMEB__
355 orr r4, r4, r5, lsr #16
356 mov r5, r5, lsl #16
357 orr r5, r5, r6, lsr #16
358 mov r6, r6, lsl #16
359 orr r6, r6, r7, lsr #16
360 mov r7, r7, lsl #16
361 orr r7, r7, ip, lsr #16
362 #else
363 orr r4, r4, r5, lsl #16
364 mov r5, r5, lsr #16
365 orr r5, r5, r6, lsl #16
366 mov r6, r6, lsr #16
367 orr r6, r6, r7, lsl #16
368 mov r7, r7, lsr #16
369 orr r7, r7, ip, lsl #16
370 #endif
371 str r4, [r1], #0x04
372 str r5, [r1], #0x04
373 str r6, [r1], #0x04
374 str r7, [r1], #0x04
375 .Lcopyin_bad2:
376 subs r2, r2, #0x10
377 bge .Lcopyin_bad2_loop16
378
379 adds r2, r2, #0x10
380 ldmeqfd sp!, {r4-r7}
381 RETeq /* Return now if done */
382 subs r2, r2, #0x04
383 sublt r0, r0, #0x02
384 blt .Lcopyin_l4
385
386 .Lcopyin_bad2_loop4:
387 #ifdef __ARMEB__
388 mov r4, ip, lsl #16
389 #else
390 mov r4, ip, lsr #16
391 #endif
392 ldrt ip, [r0], #0x04
393 subs r2, r2, #0x04
394 #ifdef __ARMEB__
395 orr r4, r4, ip, lsr #16
396 #else
397 orr r4, r4, ip, lsl #16
398 #endif
399 str r4, [r1], #0x04
400 bge .Lcopyin_bad2_loop4
401 sub r0, r0, #0x02
402 b .Lcopyin_l4
403
404 .Lcopyin_bad3_loop16:
405 #ifdef __ARMEB__
406 mov r4, ip, lsl #24
407 #else
408 mov r4, ip, lsr #24
409 #endif
410 ldrt r5, [r0], #0x04
411 pld [r0, #0x018]
412 ldrt r6, [r0], #0x04
413 ldrt r7, [r0], #0x04
414 ldrt ip, [r0], #0x04
415 #ifdef __ARMEB__
416 orr r4, r4, r5, lsr #8
417 mov r5, r5, lsl #24
418 orr r5, r5, r6, lsr #8
419 mov r6, r6, lsl #24
420 orr r6, r6, r7, lsr #8
421 mov r7, r7, lsl #24
422 orr r7, r7, ip, lsr #8
423 #else
424 orr r4, r4, r5, lsl #8
425 mov r5, r5, lsr #24
426 orr r5, r5, r6, lsl #8
427 mov r6, r6, lsr #24
428 orr r6, r6, r7, lsl #8
429 mov r7, r7, lsr #24
430 orr r7, r7, ip, lsl #8
431 #endif
432 str r4, [r1], #0x04
433 str r5, [r1], #0x04
434 str r6, [r1], #0x04
435 str r7, [r1], #0x04
436 .Lcopyin_bad3:
437 subs r2, r2, #0x10
438 bge .Lcopyin_bad3_loop16
439
440 adds r2, r2, #0x10
441 ldmeqfd sp!, {r4-r7}
442 RETeq /* Return now if done */
443 subs r2, r2, #0x04
444 sublt r0, r0, #0x01
445 blt .Lcopyin_l4
446
447 .Lcopyin_bad3_loop4:
448 #ifdef __ARMEB__
449 mov r4, ip, lsl #24
450 #else
451 mov r4, ip, lsr #24
452 #endif
453 ldrt ip, [r0], #0x04
454 subs r2, r2, #0x04
455 #ifdef __ARMEB__
456 orr r4, r4, ip, lsr #8
457 #else
458 orr r4, r4, ip, lsl #8
459 #endif
460 str r4, [r1], #0x04
461 bge .Lcopyin_bad3_loop4
462 sub r0, r0, #0x01
463
464 .Lcopyin_l4:
465 ldmfd sp!, {r4-r7}
466 mov r3, #0x00
467 adds r2, r2, #0x04
468 RETeq
469 .Lcopyin_l4_2:
470 rsbs r2, r2, #0x03
471 addne pc, pc, r2, lsl #3
472 nop
473 ldrbt ip, [r0], #0x01
474 strb ip, [r1], #0x01
475 ldrbt ip, [r0], #0x01
476 strb ip, [r1], #0x01
477 ldrbt ip, [r0]
478 strb ip, [r1]
479 RET
480
481
482 /*
483 * r0 = kernel space address
484 * r1 = user space address
485 * r2 = length
486 *
487 * Copies bytes from kernel space to user space
488 */
489 ENTRY(copyout)
490 cmp r2, #0x00
491 movle r0, #0x00
492 movle pc, lr /* Bail early if length is <= 0 */
493
494 stmfd sp!, {r10-r11, lr}
495
496 #ifdef MULTIPROCESSOR
497 /* XXX Probably not appropriate for non-Hydra SMPs */
498 stmfd sp!, {r0-r2}
499 bl _C_LABEL(cpu_number)
500 ldr r10, .Lcpu_info
501 ldmfd sp!, {r0-r2}
502 ldr r10, [r10, r0, lsl #2]
503 ldr r10, [r10, #CI_CURPCB]
504 #else
505 ldr r10, .Lcurpcb
506 ldr r10, [r10]
507 #endif
508
509 mov r3, #0x00
510 adr ip, .Lcopyout_fault
511 ldr r11, [r10, #PCB_ONFAULT]
512 str ip, [r10, #PCB_ONFAULT]
513 bl .Lcopyout_guts
514 str r11, [r10, #PCB_ONFAULT]
515 mov r0, #0x00
516 ldmfd sp!, {r10-r11, pc}
517
518 .Lcopyout_fault:
519 str r11, [r10, #PCB_ONFAULT]
520 cmp r3, #0x00
521 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
522 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
523 ldmfd sp!, {r10-r11, pc}
524
525 .Lcopyout_guts:
526 pld [r0]
527 /* Word-align the destination buffer */
528 ands ip, r1, #0x03 /* Already word aligned? */
529 beq .Lcopyout_wordaligned /* Yup */
530 rsb ip, ip, #0x04
531 cmp r2, ip /* Enough bytes left to align it? */
532 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
533 sub r2, r2, ip
534 rsbs ip, ip, #0x03
535 addne pc, pc, ip, lsl #3
536 nop
537 ldrb ip, [r0], #0x01
538 strbt ip, [r1], #0x01
539 ldrb ip, [r0], #0x01
540 strbt ip, [r1], #0x01
541 ldrb ip, [r0], #0x01
542 strbt ip, [r1], #0x01
543 cmp r2, #0x00 /* All done? */
544 RETeq
545
546 /* Destination buffer is now word aligned */
547 .Lcopyout_wordaligned:
548 ands ip, r0, #0x03 /* Is src also word-aligned? */
549 bne .Lcopyout_bad_align /* Nope. Things just got bad */
550 cmp r2, #0x08 /* Less than 8 bytes remaining? */
551 blt .Lcopyout_w_less_than8
552
553 /* Quad-align the destination buffer */
554 tst r1, #0x07 /* Already quad aligned? */
555 ldrne ip, [r0], #0x04
556 stmfd sp!, {r4-r9} /* Free up some registers */
557 mov r3, #-1 /* Signal restore r4-r9 */
558 tst r1, #0x07 /* XXX: bug work-around */
559 subne r2, r2, #0x04
560 strnet ip, [r1], #0x04
561
562 /* Destination buffer quad aligned, source is word aligned */
563 subs r2, r2, #0x80
564 blt .Lcopyout_w_lessthan128
565
566 /* Copy 128 bytes at a time */
567 .Lcopyout_w_loop128:
568 ldr r4, [r0], #0x04 /* LD:00-03 */
569 ldr r5, [r0], #0x04 /* LD:04-07 */
570 pld [r0, #0x18] /* Prefetch 0x20 */
571 ldr r6, [r0], #0x04 /* LD:08-0b */
572 ldr r7, [r0], #0x04 /* LD:0c-0f */
573 ldr r8, [r0], #0x04 /* LD:10-13 */
574 ldr r9, [r0], #0x04 /* LD:14-17 */
575 strt r4, [r1], #0x04 /* ST:00-03 */
576 strt r5, [r1], #0x04 /* ST:04-07 */
577 ldr r4, [r0], #0x04 /* LD:18-1b */
578 ldr r5, [r0], #0x04 /* LD:1c-1f */
579 strt r6, [r1], #0x04 /* ST:08-0b */
580 strt r7, [r1], #0x04 /* ST:0c-0f */
581 ldr r6, [r0], #0x04 /* LD:20-23 */
582 ldr r7, [r0], #0x04 /* LD:24-27 */
583 pld [r0, #0x18] /* Prefetch 0x40 */
584 strt r8, [r1], #0x04 /* ST:10-13 */
585 strt r9, [r1], #0x04 /* ST:14-17 */
586 ldr r8, [r0], #0x04 /* LD:28-2b */
587 ldr r9, [r0], #0x04 /* LD:2c-2f */
588 strt r4, [r1], #0x04 /* ST:18-1b */
589 strt r5, [r1], #0x04 /* ST:1c-1f */
590 ldr r4, [r0], #0x04 /* LD:30-33 */
591 ldr r5, [r0], #0x04 /* LD:34-37 */
592 strt r6, [r1], #0x04 /* ST:20-23 */
593 strt r7, [r1], #0x04 /* ST:24-27 */
594 ldr r6, [r0], #0x04 /* LD:38-3b */
595 ldr r7, [r0], #0x04 /* LD:3c-3f */
596 strt r8, [r1], #0x04 /* ST:28-2b */
597 strt r9, [r1], #0x04 /* ST:2c-2f */
598 ldr r8, [r0], #0x04 /* LD:40-43 */
599 ldr r9, [r0], #0x04 /* LD:44-47 */
600 pld [r0, #0x18] /* Prefetch 0x60 */
601 strt r4, [r1], #0x04 /* ST:30-33 */
602 strt r5, [r1], #0x04 /* ST:34-37 */
603 ldr r4, [r0], #0x04 /* LD:48-4b */
604 ldr r5, [r0], #0x04 /* LD:4c-4f */
605 strt r6, [r1], #0x04 /* ST:38-3b */
606 strt r7, [r1], #0x04 /* ST:3c-3f */
607 ldr r6, [r0], #0x04 /* LD:50-53 */
608 ldr r7, [r0], #0x04 /* LD:54-57 */
609 strt r8, [r1], #0x04 /* ST:40-43 */
610 strt r9, [r1], #0x04 /* ST:44-47 */
611 ldr r8, [r0], #0x04 /* LD:58-5b */
612 ldr r9, [r0], #0x04 /* LD:5c-5f */
613 strt r4, [r1], #0x04 /* ST:48-4b */
614 strt r5, [r1], #0x04 /* ST:4c-4f */
615 ldr r4, [r0], #0x04 /* LD:60-63 */
616 ldr r5, [r0], #0x04 /* LD:64-67 */
617 pld [r0, #0x18] /* Prefetch 0x80 */
618 strt r6, [r1], #0x04 /* ST:50-53 */
619 strt r7, [r1], #0x04 /* ST:54-57 */
620 ldr r6, [r0], #0x04 /* LD:68-6b */
621 ldr r7, [r0], #0x04 /* LD:6c-6f */
622 strt r8, [r1], #0x04 /* ST:58-5b */
623 strt r9, [r1], #0x04 /* ST:5c-5f */
624 ldr r8, [r0], #0x04 /* LD:70-73 */
625 ldr r9, [r0], #0x04 /* LD:74-77 */
626 strt r4, [r1], #0x04 /* ST:60-63 */
627 strt r5, [r1], #0x04 /* ST:64-67 */
628 ldr r4, [r0], #0x04 /* LD:78-7b */
629 ldr r5, [r0], #0x04 /* LD:7c-7f */
630 strt r6, [r1], #0x04 /* ST:68-6b */
631 strt r7, [r1], #0x04 /* ST:6c-6f */
632 strt r8, [r1], #0x04 /* ST:70-73 */
633 strt r9, [r1], #0x04 /* ST:74-77 */
634 subs r2, r2, #0x80
635 strt r4, [r1], #0x04 /* ST:78-7b */
636 strt r5, [r1], #0x04 /* ST:7c-7f */
637 bge .Lcopyout_w_loop128
638
639 .Lcopyout_w_lessthan128:
640 adds r2, r2, #0x80 /* Adjust for extra sub */
641 ldmeqfd sp!, {r4-r9}
642 RETeq /* Return now if done */
643 subs r2, r2, #0x20
644 blt .Lcopyout_w_lessthan32
645
646 /* Copy 32 bytes at a time */
647 .Lcopyout_w_loop32:
648 ldr r4, [r0], #0x04
649 ldr r5, [r0], #0x04
650 pld [r0, #0x18]
651 ldr r6, [r0], #0x04
652 ldr r7, [r0], #0x04
653 ldr r8, [r0], #0x04
654 ldr r9, [r0], #0x04
655 strt r4, [r1], #0x04
656 strt r5, [r1], #0x04
657 ldr r4, [r0], #0x04
658 ldr r5, [r0], #0x04
659 strt r6, [r1], #0x04
660 strt r7, [r1], #0x04
661 strt r8, [r1], #0x04
662 strt r9, [r1], #0x04
663 subs r2, r2, #0x20
664 strt r4, [r1], #0x04
665 strt r5, [r1], #0x04
666 bge .Lcopyout_w_loop32
667
668 .Lcopyout_w_lessthan32:
669 adds r2, r2, #0x20 /* Adjust for extra sub */
670 ldmeqfd sp!, {r4-r9}
671 RETeq /* Return now if done */
672
673 and r4, r2, #0x18
674 rsb r5, r4, #0x18
675 subs r2, r2, r4
676 add pc, pc, r5, lsl #1
677 nop
678
679 /* At least 24 bytes remaining */
680 ldr r4, [r0], #0x04
681 ldr r5, [r0], #0x04
682 strt r4, [r1], #0x04
683 strt r5, [r1], #0x04
684
685 /* At least 16 bytes remaining */
686 ldr r4, [r0], #0x04
687 ldr r5, [r0], #0x04
688 strt r4, [r1], #0x04
689 strt r5, [r1], #0x04
690
691 /* At least 8 bytes remaining */
692 ldr r4, [r0], #0x04
693 ldr r5, [r0], #0x04
694 strt r4, [r1], #0x04
695 strt r5, [r1], #0x04
696
697 /* Less than 8 bytes remaining */
698 ldmfd sp!, {r4-r9}
699 RETeq /* Return now if done */
700 mov r3, #0x00
701
702 .Lcopyout_w_less_than8:
703 subs r2, r2, #0x04
704 ldrge ip, [r0], #0x04
705 strget ip, [r1], #0x04
706 RETeq /* Return now if done */
707 addlt r2, r2, #0x04
708 ldrb ip, [r0], #0x01
709 cmp r2, #0x02
710 ldrgeb r2, [r0], #0x01
711 strbt ip, [r1], #0x01
712 ldrgtb ip, [r0]
713 strgebt r2, [r1], #0x01
714 strgtbt ip, [r1]
715 RET
716
717 /*
718 * At this point, it has not been possible to word align both buffers.
719 * The destination buffer (r1) is word aligned, but the source buffer
720 * (r0) is not.
721 */
722 .Lcopyout_bad_align:
723 stmfd sp!, {r4-r7}
724 mov r3, #0x01
725 bic r0, r0, #0x03
726 cmp ip, #2
727 ldr ip, [r0], #0x04
728 bgt .Lcopyout_bad3
729 beq .Lcopyout_bad2
730 b .Lcopyout_bad1
731
732 .Lcopyout_bad1_loop16:
733 #ifdef __ARMEB__
734 mov r4, ip, lsl #8
735 #else
736 mov r4, ip, lsr #8
737 #endif
738 ldr r5, [r0], #0x04
739 pld [r0, #0x018]
740 ldr r6, [r0], #0x04
741 ldr r7, [r0], #0x04
742 ldr ip, [r0], #0x04
743 #ifdef __ARMEB__
744 orr r4, r4, r5, lsr #24
745 mov r5, r5, lsl #8
746 orr r5, r5, r6, lsr #24
747 mov r6, r6, lsl #8
748 orr r6, r6, r7, lsr #24
749 mov r7, r7, lsl #8
750 orr r7, r7, ip, lsr #24
751 #else
752 orr r4, r4, r5, lsl #24
753 mov r5, r5, lsr #8
754 orr r5, r5, r6, lsl #24
755 mov r6, r6, lsr #8
756 orr r6, r6, r7, lsl #24
757 mov r7, r7, lsr #8
758 orr r7, r7, ip, lsl #24
759 #endif
760 strt r4, [r1], #0x04
761 strt r5, [r1], #0x04
762 strt r6, [r1], #0x04
763 strt r7, [r1], #0x04
764 .Lcopyout_bad1:
765 subs r2, r2, #0x10
766 bge .Lcopyout_bad1_loop16
767
768 adds r2, r2, #0x10
769 ldmeqfd sp!, {r4-r7}
770 RETeq /* Return now if done */
771 subs r2, r2, #0x04
772 sublt r0, r0, #0x03
773 blt .Lcopyout_l4
774
775 .Lcopyout_bad1_loop4:
776 #ifdef __ARMEB__
777 mov r4, ip, lsl #8
778 #else
779 mov r4, ip, lsr #8
780 #endif
781 ldr ip, [r0], #0x04
782 subs r2, r2, #0x04
783 #ifdef __ARMEB__
784 orr r4, r4, ip, lsr #24
785 #else
786 orr r4, r4, ip, lsl #24
787 #endif
788 strt r4, [r1], #0x04
789 bge .Lcopyout_bad1_loop4
790 sub r0, r0, #0x03
791 b .Lcopyout_l4
792
793 .Lcopyout_bad2_loop16:
794 #ifdef __ARMEB__
795 mov r4, ip, lsl #16
796 #else
797 mov r4, ip, lsr #16
798 #endif
799 ldr r5, [r0], #0x04
800 pld [r0, #0x018]
801 ldr r6, [r0], #0x04
802 ldr r7, [r0], #0x04
803 ldr ip, [r0], #0x04
804 #ifdef __ARMEB__
805 orr r4, r4, r5, lsr #16
806 mov r5, r5, lsl #16
807 orr r5, r5, r6, lsr #16
808 mov r6, r6, lsl #16
809 orr r6, r6, r7, lsr #16
810 mov r7, r7, lsl #16
811 orr r7, r7, ip, lsr #16
812 #else
813 orr r4, r4, r5, lsl #16
814 mov r5, r5, lsr #16
815 orr r5, r5, r6, lsl #16
816 mov r6, r6, lsr #16
817 orr r6, r6, r7, lsl #16
818 mov r7, r7, lsr #16
819 orr r7, r7, ip, lsl #16
820 #endif
821 strt r4, [r1], #0x04
822 strt r5, [r1], #0x04
823 strt r6, [r1], #0x04
824 strt r7, [r1], #0x04
825 .Lcopyout_bad2:
826 subs r2, r2, #0x10
827 bge .Lcopyout_bad2_loop16
828
829 adds r2, r2, #0x10
830 ldmeqfd sp!, {r4-r7}
831 RETeq /* Return now if done */
832 subs r2, r2, #0x04
833 sublt r0, r0, #0x02
834 blt .Lcopyout_l4
835
836 .Lcopyout_bad2_loop4:
837 #ifdef __ARMEB__
838 mov r4, ip, lsl #16
839 #else
840 mov r4, ip, lsr #16
841 #endif
842 ldr ip, [r0], #0x04
843 subs r2, r2, #0x04
844 #ifdef __ARMEB__
845 orr r4, r4, ip, lsr #16
846 #else
847 orr r4, r4, ip, lsl #16
848 #endif
849 strt r4, [r1], #0x04
850 bge .Lcopyout_bad2_loop4
851 sub r0, r0, #0x02
852 b .Lcopyout_l4
853
854 .Lcopyout_bad3_loop16:
855 #ifdef __ARMEB__
856 mov r4, ip, lsl #24
857 #else
858 mov r4, ip, lsr #24
859 #endif
860 ldr r5, [r0], #0x04
861 pld [r0, #0x018]
862 ldr r6, [r0], #0x04
863 ldr r7, [r0], #0x04
864 ldr ip, [r0], #0x04
865 #ifdef __ARMEB__
866 orr r4, r4, r5, lsr #8
867 mov r5, r5, lsl #24
868 orr r5, r5, r6, lsr #8
869 mov r6, r6, lsl #24
870 orr r6, r6, r7, lsr #8
871 mov r7, r7, lsl #24
872 orr r7, r7, ip, lsr #8
873 #else
874 orr r4, r4, r5, lsl #8
875 mov r5, r5, lsr #24
876 orr r5, r5, r6, lsl #8
877 mov r6, r6, lsr #24
878 orr r6, r6, r7, lsl #8
879 mov r7, r7, lsr #24
880 orr r7, r7, ip, lsl #8
881 #endif
882 strt r4, [r1], #0x04
883 strt r5, [r1], #0x04
884 strt r6, [r1], #0x04
885 strt r7, [r1], #0x04
886 .Lcopyout_bad3:
887 subs r2, r2, #0x10
888 bge .Lcopyout_bad3_loop16
889
890 adds r2, r2, #0x10
891 ldmeqfd sp!, {r4-r7}
892 RETeq /* Return now if done */
893 subs r2, r2, #0x04
894 sublt r0, r0, #0x01
895 blt .Lcopyout_l4
896
897 .Lcopyout_bad3_loop4:
898 #ifdef __ARMEB__
899 mov r4, ip, lsl #24
900 #else
901 mov r4, ip, lsr #24
902 #endif
903 ldr ip, [r0], #0x04
904 subs r2, r2, #0x04
905 #ifdef __ARMEB__
906 orr r4, r4, ip, lsr #8
907 #else
908 orr r4, r4, ip, lsl #8
909 #endif
910 strt r4, [r1], #0x04
911 bge .Lcopyout_bad3_loop4
912 sub r0, r0, #0x01
913
914 .Lcopyout_l4:
915 ldmfd sp!, {r4-r7}
916 mov r3, #0x00
917 adds r2, r2, #0x04
918 RETeq
919 .Lcopyout_l4_2:
920 rsbs r2, r2, #0x03
921 addne pc, pc, r2, lsl #3
922 nop
923 ldrb ip, [r0], #0x01
924 strbt ip, [r1], #0x01
925 ldrb ip, [r0], #0x01
926 strbt ip, [r1], #0x01
927 ldrb ip, [r0]
928 strbt ip, [r1]
929 RET
Cache object: 59405f601df3ea156e95a3091562c21a
|