1 /* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39 #include "assym.s"
40
41 #include <machine/asm.h>
42
43 .L_arm_memcpy:
44 .word _C_LABEL(_arm_memcpy)
45 .L_min_memcpy_size:
46 .word _C_LABEL(_min_memcpy_size)
47
48 __FBSDID("$FreeBSD$");
49 #ifdef _ARM_ARCH_5E
50 #include <arm/arm/bcopyinout_xscale.S>
51 #else
52
53 .text
54 .align 0
55
56 #ifdef MULTIPROCESSOR
57 .Lcpu_info:
58 .word _C_LABEL(cpu_info)
59 #else
60 .Lcurpcb:
61 .word _C_LABEL(__pcpu) + PC_CURPCB
62 #endif
63
64 #define SAVE_REGS stmfd sp!, {r4-r11}
65 #define RESTORE_REGS ldmfd sp!, {r4-r11}
66
67 #if defined(_ARM_ARCH_5E)
68 #define HELLOCPP #
69 #define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
70 #else
71 #define PREFETCH(rx,o)
72 #endif
73
74 /*
75 * r0 = user space address
76 * r1 = kernel space address
77 * r2 = length
78 *
79 * Copies bytes from user space to kernel space
80 *
81 * We save/restore r4-r11:
82 * r4-r11 are scratch
83 */
84 ENTRY(copyin)
85 /* Quick exit if length is zero */
86 teq r2, #0
87 moveq r0, #0
88 RETeq
89
90 ldr r3, .L_arm_memcpy
91 ldr r3, [r3]
92 cmp r3, #0
93 beq .Lnormal
94 ldr r3, .L_min_memcpy_size
95 ldr r3, [r3]
96 cmp r2, r3
97 blt .Lnormal
98 stmfd sp!, {r0-r2, r4, lr}
99 mov r3, r0
100 mov r0, r1
101 mov r1, r3
102 mov r3, #2 /* SRC_IS_USER */
103 ldr r4, .L_arm_memcpy
104 mov lr, pc
105 ldr pc, [r4]
106 cmp r0, #0
107 ldmfd sp!, {r0-r2, r4, lr}
108 moveq r0, #0
109 RETeq
110
111 .Lnormal:
112 SAVE_REGS
113 #ifdef MULTIPROCESSOR
114 /* XXX Probably not appropriate for non-Hydra SMPs */
115 stmfd sp!, {r0-r2, r14}
116 bl _C_LABEL(cpu_number)
117 ldr r4, .Lcpu_info
118 ldr r4, [r4, r0, lsl #2]
119 ldr r4, [r4, #CI_CURPCB]
120 ldmfd sp!, {r0-r2, r14}
121 #else
122 ldr r4, .Lcurpcb
123 ldr r4, [r4]
124 #endif
125
126 ldr r5, [r4, #PCB_ONFAULT]
127 adr r3, .Lcopyfault
128 str r3, [r4, #PCB_ONFAULT]
129
130 PREFETCH(r0, 0)
131 PREFETCH(r1, 0)
132
133 /*
134 * If not too many bytes, take the slow path.
135 */
136 cmp r2, #0x08
137 blt .Licleanup
138
139 /*
140 * Align destination to word boundary.
141 */
142 and r6, r1, #0x3
143 ldr pc, [pc, r6, lsl #2]
144 b .Lialend
145 .word .Lialend
146 .word .Lial3
147 .word .Lial2
148 .word .Lial1
149 .Lial3: ldrbt r6, [r0], #1
150 sub r2, r2, #1
151 strb r6, [r1], #1
152 .Lial2: ldrbt r7, [r0], #1
153 sub r2, r2, #1
154 strb r7, [r1], #1
155 .Lial1: ldrbt r6, [r0], #1
156 sub r2, r2, #1
157 strb r6, [r1], #1
158 .Lialend:
159
160 /*
161 * If few bytes left, finish slow.
162 */
163 cmp r2, #0x08
164 blt .Licleanup
165
166 /*
167 * If source is not aligned, finish slow.
168 */
169 ands r3, r0, #0x03
170 bne .Licleanup
171
172 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
173 blt .Licleanup8
174
175 /*
176 * Align destination to cacheline boundary.
177 * If source and destination are nicely aligned, this can be a big
178 * win. If not, it's still cheaper to copy in groups of 32 even if
179 * we don't get the nice cacheline alignment.
180 */
181 and r6, r1, #0x1f
182 ldr pc, [pc, r6]
183 b .Licaligned
184 .word .Licaligned
185 .word .Lical28
186 .word .Lical24
187 .word .Lical20
188 .word .Lical16
189 .word .Lical12
190 .word .Lical8
191 .word .Lical4
192 .Lical28:ldrt r6, [r0], #4
193 sub r2, r2, #4
194 str r6, [r1], #4
195 .Lical24:ldrt r7, [r0], #4
196 sub r2, r2, #4
197 str r7, [r1], #4
198 .Lical20:ldrt r6, [r0], #4
199 sub r2, r2, #4
200 str r6, [r1], #4
201 .Lical16:ldrt r7, [r0], #4
202 sub r2, r2, #4
203 str r7, [r1], #4
204 .Lical12:ldrt r6, [r0], #4
205 sub r2, r2, #4
206 str r6, [r1], #4
207 .Lical8:ldrt r7, [r0], #4
208 sub r2, r2, #4
209 str r7, [r1], #4
210 .Lical4:ldrt r6, [r0], #4
211 sub r2, r2, #4
212 str r6, [r1], #4
213
214 /*
215 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
216 * part of the code, and we may have knocked that down by as much
217 * as 0x1c getting aligned).
218 *
219 * This loop basically works out to:
220 * do {
221 * prefetch-next-cacheline(s)
222 * bytes -= 0x20;
223 * copy cacheline
224 * } while (bytes >= 0x40);
225 * bytes -= 0x20;
226 * copy cacheline
227 */
228 .Licaligned:
229 PREFETCH(r0, 32)
230 PREFETCH(r1, 32)
231
232 sub r2, r2, #0x20
233
234 /* Copy a cacheline */
235 ldrt r10, [r0], #4
236 ldrt r11, [r0], #4
237 ldrt r6, [r0], #4
238 ldrt r7, [r0], #4
239 ldrt r8, [r0], #4
240 ldrt r9, [r0], #4
241 stmia r1!, {r10-r11}
242 ldrt r10, [r0], #4
243 ldrt r11, [r0], #4
244 stmia r1!, {r6-r11}
245
246 cmp r2, #0x40
247 bge .Licaligned
248
249 sub r2, r2, #0x20
250
251 /* Copy a cacheline */
252 ldrt r10, [r0], #4
253 ldrt r11, [r0], #4
254 ldrt r6, [r0], #4
255 ldrt r7, [r0], #4
256 ldrt r8, [r0], #4
257 ldrt r9, [r0], #4
258 stmia r1!, {r10-r11}
259 ldrt r10, [r0], #4
260 ldrt r11, [r0], #4
261 stmia r1!, {r6-r11}
262
263 cmp r2, #0x08
264 blt .Liprecleanup
265
266 .Licleanup8:
267 ldrt r8, [r0], #4
268 ldrt r9, [r0], #4
269 sub r2, r2, #8
270 stmia r1!, {r8, r9}
271 cmp r2, #8
272 bge .Licleanup8
273
274 .Liprecleanup:
275 /*
276 * If we're done, bail.
277 */
278 cmp r2, #0
279 beq .Lout
280
281 .Licleanup:
282 and r6, r2, #0x3
283 ldr pc, [pc, r6, lsl #2]
284 b .Licend
285 .word .Lic4
286 .word .Lic1
287 .word .Lic2
288 .word .Lic3
289 .Lic4: ldrbt r6, [r0], #1
290 sub r2, r2, #1
291 strb r6, [r1], #1
292 .Lic3: ldrbt r7, [r0], #1
293 sub r2, r2, #1
294 strb r7, [r1], #1
295 .Lic2: ldrbt r6, [r0], #1
296 sub r2, r2, #1
297 strb r6, [r1], #1
298 .Lic1: ldrbt r7, [r0], #1
299 subs r2, r2, #1
300 strb r7, [r1], #1
301 .Licend:
302 bne .Licleanup
303
304 .Liout:
305 mov r0, #0
306
307 str r5, [r4, #PCB_ONFAULT]
308 RESTORE_REGS
309
310 RET
311
312 .Lcopyfault:
313 mov r0, #14 /* EFAULT */
314 str r5, [r4, #PCB_ONFAULT]
315 RESTORE_REGS
316
317 RET
318
319 /*
320 * r0 = kernel space address
321 * r1 = user space address
322 * r2 = length
323 *
324 * Copies bytes from kernel space to user space
325 *
326 * We save/restore r4-r11:
327 * r4-r11 are scratch
328 */
329
330 ENTRY(copyout)
331 /* Quick exit if length is zero */
332 teq r2, #0
333 moveq r0, #0
334 RETeq
335
336 ldr r3, .L_arm_memcpy
337 ldr r3, [r3]
338 cmp r3, #0
339 beq .Lnormale
340 ldr r3, .L_min_memcpy_size
341 ldr r3, [r3]
342 cmp r2, r3
343 blt .Lnormale
344 stmfd sp!, {r0-r2, r4, lr}
345 mov r3, r0
346 mov r0, r1
347 mov r1, r3
348 mov r3, #1 /* DST_IS_USER */
349 ldr r4, .L_arm_memcpy
350 mov lr, pc
351 ldr pc, [r4]
352 cmp r0, #0
353 ldmfd sp!, {r0-r2, r4, lr}
354 moveq r0, #0
355 RETeq
356
357 .Lnormale:
358 SAVE_REGS
359 #ifdef MULTIPROCESSOR
360 /* XXX Probably not appropriate for non-Hydra SMPs */
361 stmfd sp!, {r0-r2, r14}
362 bl _C_LABEL(cpu_number)
363 ldr r4, .Lcpu_info
364 ldr r4, [r4, r0, lsl #2]
365 ldr r4, [r4, #CI_CURPCB]
366 ldmfd sp!, {r0-r2, r14}
367 #else
368 ldr r4, .Lcurpcb
369 ldr r4, [r4]
370 #endif
371
372 ldr r5, [r4, #PCB_ONFAULT]
373 adr r3, .Lcopyfault
374 str r3, [r4, #PCB_ONFAULT]
375
376 PREFETCH(r0, 0)
377 PREFETCH(r1, 0)
378
379 /*
380 * If not too many bytes, take the slow path.
381 */
382 cmp r2, #0x08
383 blt .Lcleanup
384
385 /*
386 * Align destination to word boundary.
387 */
388 and r6, r1, #0x3
389 ldr pc, [pc, r6, lsl #2]
390 b .Lalend
391 .word .Lalend
392 .word .Lal3
393 .word .Lal2
394 .word .Lal1
395 .Lal3: ldrb r6, [r0], #1
396 sub r2, r2, #1
397 strbt r6, [r1], #1
398 .Lal2: ldrb r7, [r0], #1
399 sub r2, r2, #1
400 strbt r7, [r1], #1
401 .Lal1: ldrb r6, [r0], #1
402 sub r2, r2, #1
403 strbt r6, [r1], #1
404 .Lalend:
405
406 /*
407 * If few bytes left, finish slow.
408 */
409 cmp r2, #0x08
410 blt .Lcleanup
411
412 /*
413 * If source is not aligned, finish slow.
414 */
415 ands r3, r0, #0x03
416 bne .Lcleanup
417
418 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
419 blt .Lcleanup8
420
421 /*
422 * Align source & destination to cacheline boundary.
423 */
424 and r6, r1, #0x1f
425 ldr pc, [pc, r6]
426 b .Lcaligned
427 .word .Lcaligned
428 .word .Lcal28
429 .word .Lcal24
430 .word .Lcal20
431 .word .Lcal16
432 .word .Lcal12
433 .word .Lcal8
434 .word .Lcal4
435 .Lcal28:ldr r6, [r0], #4
436 sub r2, r2, #4
437 strt r6, [r1], #4
438 .Lcal24:ldr r7, [r0], #4
439 sub r2, r2, #4
440 strt r7, [r1], #4
441 .Lcal20:ldr r6, [r0], #4
442 sub r2, r2, #4
443 strt r6, [r1], #4
444 .Lcal16:ldr r7, [r0], #4
445 sub r2, r2, #4
446 strt r7, [r1], #4
447 .Lcal12:ldr r6, [r0], #4
448 sub r2, r2, #4
449 strt r6, [r1], #4
450 .Lcal8: ldr r7, [r0], #4
451 sub r2, r2, #4
452 strt r7, [r1], #4
453 .Lcal4: ldr r6, [r0], #4
454 sub r2, r2, #4
455 strt r6, [r1], #4
456
457 /*
458 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
459 * part of the code, and we may have knocked that down by as much
460 * as 0x1c getting aligned).
461 *
462 * This loop basically works out to:
463 * do {
464 * prefetch-next-cacheline(s)
465 * bytes -= 0x20;
466 * copy cacheline
467 * } while (bytes >= 0x40);
468 * bytes -= 0x20;
469 * copy cacheline
470 */
471 .Lcaligned:
472 PREFETCH(r0, 32)
473 PREFETCH(r1, 32)
474
475 sub r2, r2, #0x20
476
477 /* Copy a cacheline */
478 ldmia r0!, {r6-r11}
479 strt r6, [r1], #4
480 strt r7, [r1], #4
481 ldmia r0!, {r6-r7}
482 strt r8, [r1], #4
483 strt r9, [r1], #4
484 strt r10, [r1], #4
485 strt r11, [r1], #4
486 strt r6, [r1], #4
487 strt r7, [r1], #4
488
489 cmp r2, #0x40
490 bge .Lcaligned
491
492 sub r2, r2, #0x20
493
494 /* Copy a cacheline */
495 ldmia r0!, {r6-r11}
496 strt r6, [r1], #4
497 strt r7, [r1], #4
498 ldmia r0!, {r6-r7}
499 strt r8, [r1], #4
500 strt r9, [r1], #4
501 strt r10, [r1], #4
502 strt r11, [r1], #4
503 strt r6, [r1], #4
504 strt r7, [r1], #4
505
506 cmp r2, #0x08
507 blt .Lprecleanup
508
509 .Lcleanup8:
510 ldmia r0!, {r8-r9}
511 sub r2, r2, #8
512 strt r8, [r1], #4
513 strt r9, [r1], #4
514 cmp r2, #8
515 bge .Lcleanup8
516
517 .Lprecleanup:
518 /*
519 * If we're done, bail.
520 */
521 cmp r2, #0
522 beq .Lout
523
524 .Lcleanup:
525 and r6, r2, #0x3
526 ldr pc, [pc, r6, lsl #2]
527 b .Lcend
528 .word .Lc4
529 .word .Lc1
530 .word .Lc2
531 .word .Lc3
532 .Lc4: ldrb r6, [r0], #1
533 sub r2, r2, #1
534 strbt r6, [r1], #1
535 .Lc3: ldrb r7, [r0], #1
536 sub r2, r2, #1
537 strbt r7, [r1], #1
538 .Lc2: ldrb r6, [r0], #1
539 sub r2, r2, #1
540 strbt r6, [r1], #1
541 .Lc1: ldrb r7, [r0], #1
542 subs r2, r2, #1
543 strbt r7, [r1], #1
544 .Lcend:
545 bne .Lcleanup
546
547 .Lout:
548 mov r0, #0
549
550 str r5, [r4, #PCB_ONFAULT]
551 RESTORE_REGS
552
553 RET
554 #endif
555
556 /*
557 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
558 *
559 * Copies a single 8-bit value from src to dest, returning 0 on success,
560 * else EFAULT if a page fault occurred.
561 */
562 ENTRY(badaddr_read_1)
563 #ifdef MULTIPROCESSOR
564 /* XXX Probably not appropriate for non-Hydra SMPs */
565 stmfd sp!, {r0-r1, r14}
566 bl _C_LABEL(cpu_number)
567 ldr r2, .Lcpu_info
568 ldr r2, [r2, r0, lsl #2]
569 ldr r2, [r2, #CI_CURPCB]
570 ldmfd sp!, {r0-r1, r14}
571 #else
572 ldr r2, .Lcurpcb
573 ldr r2, [r2]
574 #endif
575 ldr ip, [r2, #PCB_ONFAULT]
576 adr r3, 1f
577 str r3, [r2, #PCB_ONFAULT]
578 nop
579 nop
580 nop
581 ldrb r3, [r0]
582 nop
583 nop
584 nop
585 strb r3, [r1]
586 mov r0, #0 /* No fault */
587 1: str ip, [r2, #PCB_ONFAULT]
588 RET
589
590 /*
591 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
592 *
593 * Copies a single 16-bit value from src to dest, returning 0 on success,
594 * else EFAULT if a page fault occurred.
595 */
596 ENTRY(badaddr_read_2)
597 #ifdef MULTIPROCESSOR
598 /* XXX Probably not appropriate for non-Hydra SMPs */
599 stmfd sp!, {r0-r1, r14}
600 bl _C_LABEL(cpu_number)
601 ldr r2, .Lcpu_info
602 ldr r2, [r2, r0, lsl #2]
603 ldr r2, [r2, #CI_CURPCB]
604 ldmfd sp!, {r0-r1, r14}
605 #else
606 ldr r2, .Lcurpcb
607 ldr r2, [r2]
608 #endif
609 ldr ip, [r2, #PCB_ONFAULT]
610 adr r3, 1f
611 str r3, [r2, #PCB_ONFAULT]
612 nop
613 nop
614 nop
615 ldrh r3, [r0]
616 nop
617 nop
618 nop
619 strh r3, [r1]
620 mov r0, #0 /* No fault */
621 1: str ip, [r2, #PCB_ONFAULT]
622 RET
623
624 /*
625 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
626 *
627 * Copies a single 32-bit value from src to dest, returning 0 on success,
628 * else EFAULT if a page fault occurred.
629 */
630 ENTRY(badaddr_read_4)
631 #ifdef MULTIPROCESSOR
632 /* XXX Probably not appropriate for non-Hydra SMPs */
633 stmfd sp!, {r0-r1, r14}
634 bl _C_LABEL(cpu_number)
635 ldr r2, .Lcpu_info
636 ldr r2, [r2, r0, lsl #2]
637 ldr r2, [r2, #CI_CURPCB]
638 ldmfd sp!, {r0-r1, r14}
639 #else
640 ldr r2, .Lcurpcb
641 ldr r2, [r2]
642 #endif
643 ldr ip, [r2, #PCB_ONFAULT]
644 adr r3, 1f
645 str r3, [r2, #PCB_ONFAULT]
646 nop
647 nop
648 nop
649 ldr r3, [r0]
650 nop
651 nop
652 nop
653 str r3, [r1]
654 mov r0, #0 /* No fault */
655 1: str ip, [r2, #PCB_ONFAULT]
656 RET
657
Cache object: 910a758e301e763043933024a01e7d78
|