1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
2
3 /*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD: releng/9.1/sys/arm/arm/bcopyinout_xscale.S 153273 2005-12-09 15:31:02Z cognet $");
40
41 .text
42 .align 0
43
44 #ifdef MULTIPROCESSOR
45 .Lcpu_info:
46 .word _C_LABEL(cpu_info)
47 #else
48 .Lcurpcb:
49 .word _C_LABEL(__pcpu) + PC_CURPCB
50 #endif
51
52 /*
53 * r0 = user space address
54 * r1 = kernel space address
55 * r2 = length
56 *
57 * Copies bytes from user space to kernel space
58 */
59 ENTRY(copyin)
60 cmp r2, #0x00
61 movle r0, #0x00
62 movle pc, lr /* Bail early if length is <= 0 */
63
64 ldr r3, .L_arm_memcpy
65 ldr r3, [r3]
66 cmp r3, #0
67 beq .Lnormal
68 ldr r3, .L_min_memcpy_size
69 ldr r3, [r3]
70 cmp r2, r3
71 blt .Lnormal
72 stmfd sp!, {r0-r2, r4, lr}
73 mov r3, r0
74 mov r0, r1
75 mov r1, r3
76 mov r3, #2 /* SRC_IS_USER */
77 ldr r4, .L_arm_memcpy
78 mov lr, pc
79 ldr pc, [r4]
80 cmp r0, #0
81 ldmfd sp!, {r0-r2, r4, lr}
82 moveq r0, #0
83 RETeq
84
85 .Lnormal:
86 stmfd sp!, {r10-r11, lr}
87
88 #ifdef MULTIPROCESSOR
89 /* XXX Probably not appropriate for non-Hydra SMPs */
90 stmfd sp!, {r0-r2}
91 bl _C_LABEL(cpu_number)
92 ldr r10, .Lcpu_info
93 ldmfd sp!, {r0-r2}
94 ldr r10, [r10, r0, lsl #2]
95 ldr r10, [r10, #CI_CURPCB]
96 #else
97 ldr r10, .Lcurpcb
98 ldr r10, [r10]
99 #endif
100
101 mov r3, #0x00
102 adr ip, .Lcopyin_fault
103 ldr r11, [r10, #PCB_ONFAULT]
104 str ip, [r10, #PCB_ONFAULT]
105 bl .Lcopyin_guts
106 str r11, [r10, #PCB_ONFAULT]
107 mov r0, #0x00
108 ldmfd sp!, {r10-r11, pc}
109
110 .Lcopyin_fault:
111 str r11, [r10, #PCB_ONFAULT]
112 cmp r3, #0x00
113 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
114 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
115 ldmfd sp!, {r10-r11, pc}
116
117 .Lcopyin_guts:
118 pld [r0]
119 /* Word-align the destination buffer */
120 ands ip, r1, #0x03 /* Already word aligned? */
121 beq .Lcopyin_wordaligned /* Yup */
122 rsb ip, ip, #0x04
123 cmp r2, ip /* Enough bytes left to align it? */
124 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
125 sub r2, r2, ip
126 rsbs ip, ip, #0x03
127 addne pc, pc, ip, lsl #3
128 nop
129 ldrbt ip, [r0], #0x01
130 strb ip, [r1], #0x01
131 ldrbt ip, [r0], #0x01
132 strb ip, [r1], #0x01
133 ldrbt ip, [r0], #0x01
134 strb ip, [r1], #0x01
135 cmp r2, #0x00 /* All done? */
136 RETeq
137
138 /* Destination buffer is now word aligned */
139 .Lcopyin_wordaligned:
140 ands ip, r0, #0x03 /* Is src also word-aligned? */
141 bne .Lcopyin_bad_align /* Nope. Things just got bad */
142 cmp r2, #0x08 /* Less than 8 bytes remaining? */
143 blt .Lcopyin_w_less_than8
144
145 /* Quad-align the destination buffer */
146 tst r1, #0x07 /* Already quad aligned? */
147 ldrnet ip, [r0], #0x04
148 strne ip, [r1], #0x04
149 subne r2, r2, #0x04
150 stmfd sp!, {r4-r9} /* Free up some registers */
151 mov r3, #-1 /* Signal restore r4-r9 */
152
153 /* Destination buffer quad aligned, source is word aligned */
154 subs r2, r2, #0x80
155 blt .Lcopyin_w_lessthan128
156
157 /* Copy 128 bytes at a time */
158 .Lcopyin_w_loop128:
159 ldrt r4, [r0], #0x04 /* LD:00-03 */
160 ldrt r5, [r0], #0x04 /* LD:04-07 */
161 pld [r0, #0x18] /* Prefetch 0x20 */
162 ldrt r6, [r0], #0x04 /* LD:08-0b */
163 ldrt r7, [r0], #0x04 /* LD:0c-0f */
164 ldrt r8, [r0], #0x04 /* LD:10-13 */
165 ldrt r9, [r0], #0x04 /* LD:14-17 */
166 strd r4, [r1], #0x08 /* ST:00-07 */
167 ldrt r4, [r0], #0x04 /* LD:18-1b */
168 ldrt r5, [r0], #0x04 /* LD:1c-1f */
169 strd r6, [r1], #0x08 /* ST:08-0f */
170 ldrt r6, [r0], #0x04 /* LD:20-23 */
171 ldrt r7, [r0], #0x04 /* LD:24-27 */
172 pld [r0, #0x18] /* Prefetch 0x40 */
173 strd r8, [r1], #0x08 /* ST:10-17 */
174 ldrt r8, [r0], #0x04 /* LD:28-2b */
175 ldrt r9, [r0], #0x04 /* LD:2c-2f */
176 strd r4, [r1], #0x08 /* ST:18-1f */
177 ldrt r4, [r0], #0x04 /* LD:30-33 */
178 ldrt r5, [r0], #0x04 /* LD:34-37 */
179 strd r6, [r1], #0x08 /* ST:20-27 */
180 ldrt r6, [r0], #0x04 /* LD:38-3b */
181 ldrt r7, [r0], #0x04 /* LD:3c-3f */
182 strd r8, [r1], #0x08 /* ST:28-2f */
183 ldrt r8, [r0], #0x04 /* LD:40-43 */
184 ldrt r9, [r0], #0x04 /* LD:44-47 */
185 pld [r0, #0x18] /* Prefetch 0x60 */
186 strd r4, [r1], #0x08 /* ST:30-37 */
187 ldrt r4, [r0], #0x04 /* LD:48-4b */
188 ldrt r5, [r0], #0x04 /* LD:4c-4f */
189 strd r6, [r1], #0x08 /* ST:38-3f */
190 ldrt r6, [r0], #0x04 /* LD:50-53 */
191 ldrt r7, [r0], #0x04 /* LD:54-57 */
192 strd r8, [r1], #0x08 /* ST:40-47 */
193 ldrt r8, [r0], #0x04 /* LD:58-5b */
194 ldrt r9, [r0], #0x04 /* LD:5c-5f */
195 strd r4, [r1], #0x08 /* ST:48-4f */
196 ldrt r4, [r0], #0x04 /* LD:60-63 */
197 ldrt r5, [r0], #0x04 /* LD:64-67 */
198 pld [r0, #0x18] /* Prefetch 0x80 */
199 strd r6, [r1], #0x08 /* ST:50-57 */
200 ldrt r6, [r0], #0x04 /* LD:68-6b */
201 ldrt r7, [r0], #0x04 /* LD:6c-6f */
202 strd r8, [r1], #0x08 /* ST:58-5f */
203 ldrt r8, [r0], #0x04 /* LD:70-73 */
204 ldrt r9, [r0], #0x04 /* LD:74-77 */
205 strd r4, [r1], #0x08 /* ST:60-67 */
206 ldrt r4, [r0], #0x04 /* LD:78-7b */
207 ldrt r5, [r0], #0x04 /* LD:7c-7f */
208 strd r6, [r1], #0x08 /* ST:68-6f */
209 strd r8, [r1], #0x08 /* ST:70-77 */
210 subs r2, r2, #0x80
211 strd r4, [r1], #0x08 /* ST:78-7f */
212 bge .Lcopyin_w_loop128
213
214 .Lcopyin_w_lessthan128:
215 adds r2, r2, #0x80 /* Adjust for extra sub */
216 ldmeqfd sp!, {r4-r9}
217 RETeq
218 subs r2, r2, #0x20
219 blt .Lcopyin_w_lessthan32
220
221 /* Copy 32 bytes at a time */
222 .Lcopyin_w_loop32:
223 ldrt r4, [r0], #0x04
224 ldrt r5, [r0], #0x04
225 pld [r0, #0x18]
226 ldrt r6, [r0], #0x04
227 ldrt r7, [r0], #0x04
228 ldrt r8, [r0], #0x04
229 ldrt r9, [r0], #0x04
230 strd r4, [r1], #0x08
231 ldrt r4, [r0], #0x04
232 ldrt r5, [r0], #0x04
233 strd r6, [r1], #0x08
234 strd r8, [r1], #0x08
235 subs r2, r2, #0x20
236 strd r4, [r1], #0x08
237 bge .Lcopyin_w_loop32
238
239 .Lcopyin_w_lessthan32:
240 adds r2, r2, #0x20 /* Adjust for extra sub */
241 ldmeqfd sp!, {r4-r9}
242 RETeq /* Return now if done */
243
244 and r4, r2, #0x18
245 rsb r5, r4, #0x18
246 subs r2, r2, r4
247 add pc, pc, r5, lsl #1
248 nop
249
250 /* At least 24 bytes remaining */
251 ldrt r4, [r0], #0x04
252 ldrt r5, [r0], #0x04
253 nop
254 strd r4, [r1], #0x08
255
256 /* At least 16 bytes remaining */
257 ldrt r4, [r0], #0x04
258 ldrt r5, [r0], #0x04
259 nop
260 strd r4, [r1], #0x08
261
262 /* At least 8 bytes remaining */
263 ldrt r4, [r0], #0x04
264 ldrt r5, [r0], #0x04
265 nop
266 strd r4, [r1], #0x08
267
268 /* Less than 8 bytes remaining */
269 ldmfd sp!, {r4-r9}
270 RETeq /* Return now if done */
271 mov r3, #0x00
272
273 .Lcopyin_w_less_than8:
274 subs r2, r2, #0x04
275 ldrget ip, [r0], #0x04
276 strge ip, [r1], #0x04
277 RETeq /* Return now if done */
278 addlt r2, r2, #0x04
279 ldrbt ip, [r0], #0x01
280 cmp r2, #0x02
281 ldrgebt r2, [r0], #0x01
282 strb ip, [r1], #0x01
283 ldrgtbt ip, [r0]
284 strgeb r2, [r1], #0x01
285 strgtb ip, [r1]
286 RET
287
288 /*
289 * At this point, it has not been possible to word align both buffers.
290 * The destination buffer (r1) is word aligned, but the source buffer
291 * (r0) is not.
292 */
293 .Lcopyin_bad_align:
294 stmfd sp!, {r4-r7}
295 mov r3, #0x01
296 bic r0, r0, #0x03
297 cmp ip, #2
298 ldrt ip, [r0], #0x04
299 bgt .Lcopyin_bad3
300 beq .Lcopyin_bad2
301 b .Lcopyin_bad1
302
303 .Lcopyin_bad1_loop16:
304 #ifdef __ARMEB__
305 mov r4, ip, lsl #8
306 #else
307 mov r4, ip, lsr #8
308 #endif
309 ldrt r5, [r0], #0x04
310 pld [r0, #0x018]
311 ldrt r6, [r0], #0x04
312 ldrt r7, [r0], #0x04
313 ldrt ip, [r0], #0x04
314 #ifdef __ARMEB__
315 orr r4, r4, r5, lsr #24
316 mov r5, r5, lsl #8
317 orr r5, r5, r6, lsr #24
318 mov r6, r6, lsl #8
319 orr r6, r6, r7, lsr #24
320 mov r7, r7, lsl #8
321 orr r7, r7, ip, lsr #24
322 #else
323 orr r4, r4, r5, lsl #24
324 mov r5, r5, lsr #8
325 orr r5, r5, r6, lsl #24
326 mov r6, r6, lsr #8
327 orr r6, r6, r7, lsl #24
328 mov r7, r7, lsr #8
329 orr r7, r7, ip, lsl #24
330 #endif
331 str r4, [r1], #0x04
332 str r5, [r1], #0x04
333 str r6, [r1], #0x04
334 str r7, [r1], #0x04
335 .Lcopyin_bad1:
336 subs r2, r2, #0x10
337 bge .Lcopyin_bad1_loop16
338
339 adds r2, r2, #0x10
340 ldmeqfd sp!, {r4-r7}
341 RETeq /* Return now if done */
342 subs r2, r2, #0x04
343 sublt r0, r0, #0x03
344 blt .Lcopyin_l4
345
346 .Lcopyin_bad1_loop4:
347 #ifdef __ARMEB__
348 mov r4, ip, lsl #8
349 #else
350 mov r4, ip, lsr #8
351 #endif
352 ldrt ip, [r0], #0x04
353 subs r2, r2, #0x04
354 #ifdef __ARMEB__
355 orr r4, r4, ip, lsr #24
356 #else
357 orr r4, r4, ip, lsl #24
358 #endif
359 str r4, [r1], #0x04
360 bge .Lcopyin_bad1_loop4
361 sub r0, r0, #0x03
362 b .Lcopyin_l4
363
364 .Lcopyin_bad2_loop16:
365 #ifdef __ARMEB__
366 mov r4, ip, lsl #16
367 #else
368 mov r4, ip, lsr #16
369 #endif
370 ldrt r5, [r0], #0x04
371 pld [r0, #0x018]
372 ldrt r6, [r0], #0x04
373 ldrt r7, [r0], #0x04
374 ldrt ip, [r0], #0x04
375 #ifdef __ARMEB__
376 orr r4, r4, r5, lsr #16
377 mov r5, r5, lsl #16
378 orr r5, r5, r6, lsr #16
379 mov r6, r6, lsl #16
380 orr r6, r6, r7, lsr #16
381 mov r7, r7, lsl #16
382 orr r7, r7, ip, lsr #16
383 #else
384 orr r4, r4, r5, lsl #16
385 mov r5, r5, lsr #16
386 orr r5, r5, r6, lsl #16
387 mov r6, r6, lsr #16
388 orr r6, r6, r7, lsl #16
389 mov r7, r7, lsr #16
390 orr r7, r7, ip, lsl #16
391 #endif
392 str r4, [r1], #0x04
393 str r5, [r1], #0x04
394 str r6, [r1], #0x04
395 str r7, [r1], #0x04
396 .Lcopyin_bad2:
397 subs r2, r2, #0x10
398 bge .Lcopyin_bad2_loop16
399
400 adds r2, r2, #0x10
401 ldmeqfd sp!, {r4-r7}
402 RETeq /* Return now if done */
403 subs r2, r2, #0x04
404 sublt r0, r0, #0x02
405 blt .Lcopyin_l4
406
407 .Lcopyin_bad2_loop4:
408 #ifdef __ARMEB__
409 mov r4, ip, lsl #16
410 #else
411 mov r4, ip, lsr #16
412 #endif
413 ldrt ip, [r0], #0x04
414 subs r2, r2, #0x04
415 #ifdef __ARMEB__
416 orr r4, r4, ip, lsr #16
417 #else
418 orr r4, r4, ip, lsl #16
419 #endif
420 str r4, [r1], #0x04
421 bge .Lcopyin_bad2_loop4
422 sub r0, r0, #0x02
423 b .Lcopyin_l4
424
425 .Lcopyin_bad3_loop16:
426 #ifdef __ARMEB__
427 mov r4, ip, lsl #24
428 #else
429 mov r4, ip, lsr #24
430 #endif
431 ldrt r5, [r0], #0x04
432 pld [r0, #0x018]
433 ldrt r6, [r0], #0x04
434 ldrt r7, [r0], #0x04
435 ldrt ip, [r0], #0x04
436 #ifdef __ARMEB__
437 orr r4, r4, r5, lsr #8
438 mov r5, r5, lsl #24
439 orr r5, r5, r6, lsr #8
440 mov r6, r6, lsl #24
441 orr r6, r6, r7, lsr #8
442 mov r7, r7, lsl #24
443 orr r7, r7, ip, lsr #8
444 #else
445 orr r4, r4, r5, lsl #8
446 mov r5, r5, lsr #24
447 orr r5, r5, r6, lsl #8
448 mov r6, r6, lsr #24
449 orr r6, r6, r7, lsl #8
450 mov r7, r7, lsr #24
451 orr r7, r7, ip, lsl #8
452 #endif
453 str r4, [r1], #0x04
454 str r5, [r1], #0x04
455 str r6, [r1], #0x04
456 str r7, [r1], #0x04
457 .Lcopyin_bad3:
458 subs r2, r2, #0x10
459 bge .Lcopyin_bad3_loop16
460
461 adds r2, r2, #0x10
462 ldmeqfd sp!, {r4-r7}
463 RETeq /* Return now if done */
464 subs r2, r2, #0x04
465 sublt r0, r0, #0x01
466 blt .Lcopyin_l4
467
468 .Lcopyin_bad3_loop4:
469 #ifdef __ARMEB__
470 mov r4, ip, lsl #24
471 #else
472 mov r4, ip, lsr #24
473 #endif
474 ldrt ip, [r0], #0x04
475 subs r2, r2, #0x04
476 #ifdef __ARMEB__
477 orr r4, r4, ip, lsr #8
478 #else
479 orr r4, r4, ip, lsl #8
480 #endif
481 str r4, [r1], #0x04
482 bge .Lcopyin_bad3_loop4
483 sub r0, r0, #0x01
484
485 .Lcopyin_l4:
486 ldmfd sp!, {r4-r7}
487 mov r3, #0x00
488 adds r2, r2, #0x04
489 RETeq
490 .Lcopyin_l4_2:
491 rsbs r2, r2, #0x03
492 addne pc, pc, r2, lsl #3
493 nop
494 ldrbt ip, [r0], #0x01
495 strb ip, [r1], #0x01
496 ldrbt ip, [r0], #0x01
497 strb ip, [r1], #0x01
498 ldrbt ip, [r0]
499 strb ip, [r1]
500 RET
501
502
503 /*
504 * r0 = kernel space address
505 * r1 = user space address
506 * r2 = length
507 *
508 * Copies bytes from kernel space to user space
509 */
510 ENTRY(copyout)
511 cmp r2, #0x00
512 movle r0, #0x00
513 movle pc, lr /* Bail early if length is <= 0 */
514
515 ldr r3, .L_arm_memcpy
516 ldr r3, [r3]
517 cmp r3, #0
518 beq .Lnormale
519 ldr r3, .L_min_memcpy_size
520 ldr r3, [r3]
521 cmp r2, r3
522 blt .Lnormale
523 stmfd sp!, {r0-r2, r4, lr}
524 mov r3, r0
525 mov r0, r1
526 mov r1, r3
527 mov r3, #1 /* DST_IS_USER */
528 ldr r4, .L_arm_memcpy
529 mov lr, pc
530 ldr pc, [r4]
531 cmp r0, #0
532 ldmfd sp!, {r0-r2, r4, lr}
533 moveq r0, #0
534 RETeq
535
536 .Lnormale:
537 stmfd sp!, {r10-r11, lr}
538
539 #ifdef MULTIPROCESSOR
540 /* XXX Probably not appropriate for non-Hydra SMPs */
541 stmfd sp!, {r0-r2}
542 bl _C_LABEL(cpu_number)
543 ldr r10, .Lcpu_info
544 ldmfd sp!, {r0-r2}
545 ldr r10, [r10, r0, lsl #2]
546 ldr r10, [r10, #CI_CURPCB]
547 #else
548 ldr r10, .Lcurpcb
549 ldr r10, [r10]
550 #endif
551
552 mov r3, #0x00
553 adr ip, .Lcopyout_fault
554 ldr r11, [r10, #PCB_ONFAULT]
555 str ip, [r10, #PCB_ONFAULT]
556 bl .Lcopyout_guts
557 str r11, [r10, #PCB_ONFAULT]
558 mov r0, #0x00
559 ldmfd sp!, {r10-r11, pc}
560
561 .Lcopyout_fault:
562 str r11, [r10, #PCB_ONFAULT]
563 cmp r3, #0x00
564 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
565 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
566 ldmfd sp!, {r10-r11, pc}
567
568 .Lcopyout_guts:
569 pld [r0]
570 /* Word-align the destination buffer */
571 ands ip, r1, #0x03 /* Already word aligned? */
572 beq .Lcopyout_wordaligned /* Yup */
573 rsb ip, ip, #0x04
574 cmp r2, ip /* Enough bytes left to align it? */
575 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
576 sub r2, r2, ip
577 rsbs ip, ip, #0x03
578 addne pc, pc, ip, lsl #3
579 nop
580 ldrb ip, [r0], #0x01
581 strbt ip, [r1], #0x01
582 ldrb ip, [r0], #0x01
583 strbt ip, [r1], #0x01
584 ldrb ip, [r0], #0x01
585 strbt ip, [r1], #0x01
586 cmp r2, #0x00 /* All done? */
587 RETeq
588
589 /* Destination buffer is now word aligned */
590 .Lcopyout_wordaligned:
591 ands ip, r0, #0x03 /* Is src also word-aligned? */
592 bne .Lcopyout_bad_align /* Nope. Things just got bad */
593 cmp r2, #0x08 /* Less than 8 bytes remaining? */
594 blt .Lcopyout_w_less_than8
595
596 /* Quad-align the destination buffer */
597 tst r0, #0x07 /* Already quad aligned? */
598 ldrne ip, [r0], #0x04
599 subne r2, r2, #0x04
600 strnet ip, [r1], #0x04
601
602 stmfd sp!, {r4-r9} /* Free up some registers */
603 mov r3, #-1 /* Signal restore r4-r9 */
604
605 /* Destination buffer word aligned, source is quad aligned */
606 subs r2, r2, #0x80
607 blt .Lcopyout_w_lessthan128
608
609 /* Copy 128 bytes at a time */
610 .Lcopyout_w_loop128:
611 ldrd r4, [r0], #0x08 /* LD:00-07 */
612 pld [r0, #0x18] /* Prefetch 0x20 */
613 ldrd r6, [r0], #0x08 /* LD:08-0f */
614 ldrd r8, [r0], #0x08 /* LD:10-17 */
615 strt r4, [r1], #0x04 /* ST:00-03 */
616 strt r5, [r1], #0x04 /* ST:04-07 */
617 ldrd r4, [r0], #0x08 /* LD:18-1f */
618 strt r6, [r1], #0x04 /* ST:08-0b */
619 strt r7, [r1], #0x04 /* ST:0c-0f */
620 ldrd r6, [r0], #0x08 /* LD:20-27 */
621 pld [r0, #0x18] /* Prefetch 0x40 */
622 strt r8, [r1], #0x04 /* ST:10-13 */
623 strt r9, [r1], #0x04 /* ST:14-17 */
624 ldrd r8, [r0], #0x08 /* LD:28-2f */
625 strt r4, [r1], #0x04 /* ST:18-1b */
626 strt r5, [r1], #0x04 /* ST:1c-1f */
627 ldrd r4, [r0], #0x08 /* LD:30-37 */
628 strt r6, [r1], #0x04 /* ST:20-23 */
629 strt r7, [r1], #0x04 /* ST:24-27 */
630 ldrd r6, [r0], #0x08 /* LD:38-3f */
631 strt r8, [r1], #0x04 /* ST:28-2b */
632 strt r9, [r1], #0x04 /* ST:2c-2f */
633 ldrd r8, [r0], #0x08 /* LD:40-47 */
634 pld [r0, #0x18] /* Prefetch 0x60 */
635 strt r4, [r1], #0x04 /* ST:30-33 */
636 strt r5, [r1], #0x04 /* ST:34-37 */
637 ldrd r4, [r0], #0x08 /* LD:48-4f */
638 strt r6, [r1], #0x04 /* ST:38-3b */
639 strt r7, [r1], #0x04 /* ST:3c-3f */
640 ldrd r6, [r0], #0x08 /* LD:50-57 */
641 strt r8, [r1], #0x04 /* ST:40-43 */
642 strt r9, [r1], #0x04 /* ST:44-47 */
643 ldrd r8, [r0], #0x08 /* LD:58-4f */
644 strt r4, [r1], #0x04 /* ST:48-4b */
645 strt r5, [r1], #0x04 /* ST:4c-4f */
646 ldrd r4, [r0], #0x08 /* LD:60-67 */
647 pld [r0, #0x18] /* Prefetch 0x80 */
648 strt r6, [r1], #0x04 /* ST:50-53 */
649 strt r7, [r1], #0x04 /* ST:54-57 */
650 ldrd r6, [r0], #0x08 /* LD:68-6f */
651 strt r8, [r1], #0x04 /* ST:58-5b */
652 strt r9, [r1], #0x04 /* ST:5c-5f */
653 ldrd r8, [r0], #0x08 /* LD:70-77 */
654 strt r4, [r1], #0x04 /* ST:60-63 */
655 strt r5, [r1], #0x04 /* ST:64-67 */
656 ldrd r4, [r0], #0x08 /* LD:78-7f */
657 strt r6, [r1], #0x04 /* ST:68-6b */
658 strt r7, [r1], #0x04 /* ST:6c-6f */
659 strt r8, [r1], #0x04 /* ST:70-73 */
660 strt r9, [r1], #0x04 /* ST:74-77 */
661 subs r2, r2, #0x80
662 strt r4, [r1], #0x04 /* ST:78-7b */
663 strt r5, [r1], #0x04 /* ST:7c-7f */
664 bge .Lcopyout_w_loop128
665
666 .Lcopyout_w_lessthan128:
667 adds r2, r2, #0x80 /* Adjust for extra sub */
668 ldmeqfd sp!, {r4-r9}
669 RETeq /* Return now if done */
670 subs r2, r2, #0x20
671 blt .Lcopyout_w_lessthan32
672
673 /* Copy 32 bytes at a time */
674 .Lcopyout_w_loop32:
675 ldrd r4, [r0], #0x08
676 pld [r0, #0x18]
677 ldrd r6, [r0], #0x08
678 ldrd r8, [r0], #0x08
679 strt r4, [r1], #0x04
680 strt r5, [r1], #0x04
681 ldrd r4, [r0], #0x08
682 strt r6, [r1], #0x04
683 strt r7, [r1], #0x04
684 strt r8, [r1], #0x04
685 strt r9, [r1], #0x04
686 subs r2, r2, #0x20
687 strt r4, [r1], #0x04
688 strt r5, [r1], #0x04
689 bge .Lcopyout_w_loop32
690
691 .Lcopyout_w_lessthan32:
692 adds r2, r2, #0x20 /* Adjust for extra sub */
693 ldmeqfd sp!, {r4-r9}
694 RETeq /* Return now if done */
695
696 and r4, r2, #0x18
697 rsb r5, r4, #0x18
698 subs r2, r2, r4
699 add pc, pc, r5, lsl #1
700 nop
701
702 /* At least 24 bytes remaining */
703 ldrd r4, [r0], #0x08
704 strt r4, [r1], #0x04
705 strt r5, [r1], #0x04
706 nop
707
708 /* At least 16 bytes remaining */
709 ldrd r4, [r0], #0x08
710 strt r4, [r1], #0x04
711 strt r5, [r1], #0x04
712 nop
713
714 /* At least 8 bytes remaining */
715 ldrd r4, [r0], #0x08
716 strt r4, [r1], #0x04
717 strt r5, [r1], #0x04
718 nop
719
720 /* Less than 8 bytes remaining */
721 ldmfd sp!, {r4-r9}
722 RETeq /* Return now if done */
723 mov r3, #0x00
724
725 .Lcopyout_w_less_than8:
726 subs r2, r2, #0x04
727 ldrge ip, [r0], #0x04
728 strget ip, [r1], #0x04
729 RETeq /* Return now if done */
730 addlt r2, r2, #0x04
731 ldrb ip, [r0], #0x01
732 cmp r2, #0x02
733 ldrgeb r2, [r0], #0x01
734 strbt ip, [r1], #0x01
735 ldrgtb ip, [r0]
736 strgebt r2, [r1], #0x01
737 strgtbt ip, [r1]
738 RET
739
740 /*
741 * At this point, it has not been possible to word align both buffers.
742 * The destination buffer (r1) is word aligned, but the source buffer
743 * (r0) is not.
744 */
745 .Lcopyout_bad_align:
746 stmfd sp!, {r4-r7}
747 mov r3, #0x01
748 bic r0, r0, #0x03
749 cmp ip, #2
750 ldr ip, [r0], #0x04
751 bgt .Lcopyout_bad3
752 beq .Lcopyout_bad2
753 b .Lcopyout_bad1
754
755 .Lcopyout_bad1_loop16:
756 #ifdef __ARMEB__
757 mov r4, ip, lsl #8
758 #else
759 mov r4, ip, lsr #8
760 #endif
761 ldr r5, [r0], #0x04
762 pld [r0, #0x018]
763 ldr r6, [r0], #0x04
764 ldr r7, [r0], #0x04
765 ldr ip, [r0], #0x04
766 #ifdef __ARMEB__
767 orr r4, r4, r5, lsr #24
768 mov r5, r5, lsl #8
769 orr r5, r5, r6, lsr #24
770 mov r6, r6, lsl #8
771 orr r6, r6, r7, lsr #24
772 mov r7, r7, lsl #8
773 orr r7, r7, ip, lsr #24
774 #else
775 orr r4, r4, r5, lsl #24
776 mov r5, r5, lsr #8
777 orr r5, r5, r6, lsl #24
778 mov r6, r6, lsr #8
779 orr r6, r6, r7, lsl #24
780 mov r7, r7, lsr #8
781 orr r7, r7, ip, lsl #24
782 #endif
783 strt r4, [r1], #0x04
784 strt r5, [r1], #0x04
785 strt r6, [r1], #0x04
786 strt r7, [r1], #0x04
787 .Lcopyout_bad1:
788 subs r2, r2, #0x10
789 bge .Lcopyout_bad1_loop16
790
791 adds r2, r2, #0x10
792 ldmeqfd sp!, {r4-r7}
793 RETeq /* Return now if done */
794 subs r2, r2, #0x04
795 sublt r0, r0, #0x03
796 blt .Lcopyout_l4
797
798 .Lcopyout_bad1_loop4:
799 #ifdef __ARMEB__
800 mov r4, ip, lsl #8
801 #else
802 mov r4, ip, lsr #8
803 #endif
804 ldr ip, [r0], #0x04
805 subs r2, r2, #0x04
806 #ifdef __ARMEB__
807 orr r4, r4, ip, lsr #24
808 #else
809 orr r4, r4, ip, lsl #24
810 #endif
811 strt r4, [r1], #0x04
812 bge .Lcopyout_bad1_loop4
813 sub r0, r0, #0x03
814 b .Lcopyout_l4
815
816 .Lcopyout_bad2_loop16:
817 #ifdef __ARMEB__
818 mov r4, ip, lsl #16
819 #else
820 mov r4, ip, lsr #16
821 #endif
822 ldr r5, [r0], #0x04
823 pld [r0, #0x018]
824 ldr r6, [r0], #0x04
825 ldr r7, [r0], #0x04
826 ldr ip, [r0], #0x04
827 #ifdef __ARMEB__
828 orr r4, r4, r5, lsr #16
829 mov r5, r5, lsl #16
830 orr r5, r5, r6, lsr #16
831 mov r6, r6, lsl #16
832 orr r6, r6, r7, lsr #16
833 mov r7, r7, lsl #16
834 orr r7, r7, ip, lsr #16
835 #else
836 orr r4, r4, r5, lsl #16
837 mov r5, r5, lsr #16
838 orr r5, r5, r6, lsl #16
839 mov r6, r6, lsr #16
840 orr r6, r6, r7, lsl #16
841 mov r7, r7, lsr #16
842 orr r7, r7, ip, lsl #16
843 #endif
844 strt r4, [r1], #0x04
845 strt r5, [r1], #0x04
846 strt r6, [r1], #0x04
847 strt r7, [r1], #0x04
848 .Lcopyout_bad2:
849 subs r2, r2, #0x10
850 bge .Lcopyout_bad2_loop16
851
852 adds r2, r2, #0x10
853 ldmeqfd sp!, {r4-r7}
854 RETeq /* Return now if done */
855 subs r2, r2, #0x04
856 sublt r0, r0, #0x02
857 blt .Lcopyout_l4
858
859 .Lcopyout_bad2_loop4:
860 #ifdef __ARMEB__
861 mov r4, ip, lsl #16
862 #else
863 mov r4, ip, lsr #16
864 #endif
865 ldr ip, [r0], #0x04
866 subs r2, r2, #0x04
867 #ifdef __ARMEB__
868 orr r4, r4, ip, lsr #16
869 #else
870 orr r4, r4, ip, lsl #16
871 #endif
872 strt r4, [r1], #0x04
873 bge .Lcopyout_bad2_loop4
874 sub r0, r0, #0x02
875 b .Lcopyout_l4
876
877 .Lcopyout_bad3_loop16:
878 #ifdef __ARMEB__
879 mov r4, ip, lsl #24
880 #else
881 mov r4, ip, lsr #24
882 #endif
883 ldr r5, [r0], #0x04
884 pld [r0, #0x018]
885 ldr r6, [r0], #0x04
886 ldr r7, [r0], #0x04
887 ldr ip, [r0], #0x04
888 #ifdef __ARMEB__
889 orr r4, r4, r5, lsr #8
890 mov r5, r5, lsl #24
891 orr r5, r5, r6, lsr #8
892 mov r6, r6, lsl #24
893 orr r6, r6, r7, lsr #8
894 mov r7, r7, lsl #24
895 orr r7, r7, ip, lsr #8
896 #else
897 orr r4, r4, r5, lsl #8
898 mov r5, r5, lsr #24
899 orr r5, r5, r6, lsl #8
900 mov r6, r6, lsr #24
901 orr r6, r6, r7, lsl #8
902 mov r7, r7, lsr #24
903 orr r7, r7, ip, lsl #8
904 #endif
905 strt r4, [r1], #0x04
906 strt r5, [r1], #0x04
907 strt r6, [r1], #0x04
908 strt r7, [r1], #0x04
909 .Lcopyout_bad3:
910 subs r2, r2, #0x10
911 bge .Lcopyout_bad3_loop16
912
913 adds r2, r2, #0x10
914 ldmeqfd sp!, {r4-r7}
915 RETeq /* Return now if done */
916 subs r2, r2, #0x04
917 sublt r0, r0, #0x01
918 blt .Lcopyout_l4
919
920 .Lcopyout_bad3_loop4:
921 #ifdef __ARMEB__
922 mov r4, ip, lsl #24
923 #else
924 mov r4, ip, lsr #24
925 #endif
926 ldr ip, [r0], #0x04
927 subs r2, r2, #0x04
928 #ifdef __ARMEB__
929 orr r4, r4, ip, lsr #8
930 #else
931 orr r4, r4, ip, lsl #8
932 #endif
933 strt r4, [r1], #0x04
934 bge .Lcopyout_bad3_loop4
935 sub r0, r0, #0x01
936
937 .Lcopyout_l4:
938 ldmfd sp!, {r4-r7}
939 mov r3, #0x00
940 adds r2, r2, #0x04
941 RETeq
942 .Lcopyout_l4_2:
943 rsbs r2, r2, #0x03
944 addne pc, pc, r2, lsl #3
945 nop
946 ldrb ip, [r0], #0x01
947 strbt ip, [r1], #0x01
948 ldrb ip, [r0], #0x01
949 strbt ip, [r1], #0x01
950 ldrb ip, [r0]
951 strbt ip, [r1]
952 RET
Cache object: 1271c59b6dbe7dde0b31dc3d7ea844a6
|