1 /* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39 #include "assym.s"
40
41 #include <machine/acle-compat.h>
42 #include <machine/asm.h>
43 #include <sys/errno.h>
44
45 .L_arm_memcpy:
46 .word _C_LABEL(_arm_memcpy)
47 .L_min_memcpy_size:
48 .word _C_LABEL(_min_memcpy_size)
49
50 __FBSDID("$FreeBSD$");
51 #ifdef _ARM_ARCH_5E
52 #include <arm/arm/bcopyinout_xscale.S>
53 #else
54
55 .text
56 .align 2
57
58 #if __ARM_ARCH >= 6
59 #define GET_PCB(tmp) \
60 mrc p15, 0, tmp, c13, c0, 4; \
61 add tmp, tmp, #(TD_PCB)
62 #else
63 .Lcurpcb:
64 .word _C_LABEL(__pcpu) + PC_CURPCB
65
66 #define GET_PCB(tmp) \
67 ldr tmp, .Lcurpcb
68 #endif
69
70
71 #define SAVE_REGS stmfd sp!, {r4-r11}
72 #define RESTORE_REGS ldmfd sp!, {r4-r11}
73
74 #if defined(_ARM_ARCH_5E)
75 #define HELLOCPP #
76 #define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
77 #else
78 #define PREFETCH(rx,o)
79 #endif
80
81 /*
82 * r0 = user space address
83 * r1 = kernel space address
84 * r2 = length
85 *
86 * Copies bytes from user space to kernel space
87 *
88 * We save/restore r4-r11:
89 * r4-r11 are scratch
90 */
91 ENTRY(copyin)
92 /* Quick exit if length is zero */
93 teq r2, #0
94 moveq r0, #0
95 RETeq
96
97 ldr r3, .L_arm_memcpy
98 ldr r3, [r3]
99 cmp r3, #0
100 beq .Lnormal
101 ldr r3, .L_min_memcpy_size
102 ldr r3, [r3]
103 cmp r2, r3
104 blt .Lnormal
105 stmfd sp!, {r0-r2, r4, lr}
106 mov r3, r0
107 mov r0, r1
108 mov r1, r3
109 mov r3, #2 /* SRC_IS_USER */
110 ldr r4, .L_arm_memcpy
111 mov lr, pc
112 ldr pc, [r4]
113 cmp r0, #0
114 ldmfd sp!, {r0-r2, r4, lr}
115 moveq r0, #0
116 RETeq
117
118 .Lnormal:
119 SAVE_REGS
120 GET_PCB(r4)
121 ldr r4, [r4]
122
123
124 ldr r5, [r4, #PCB_ONFAULT]
125 adr r3, .Lcopyfault
126 str r3, [r4, #PCB_ONFAULT]
127
128 PREFETCH(r0, 0)
129 PREFETCH(r1, 0)
130
131 /*
132 * If not too many bytes, take the slow path.
133 */
134 cmp r2, #0x08
135 blt .Licleanup
136
137 /*
138 * Align destination to word boundary.
139 */
140 and r6, r1, #0x3
141 ldr pc, [pc, r6, lsl #2]
142 b .Lialend
143 .word .Lialend
144 .word .Lial3
145 .word .Lial2
146 .word .Lial1
147 .Lial3: ldrbt r6, [r0], #1
148 sub r2, r2, #1
149 strb r6, [r1], #1
150 .Lial2: ldrbt r7, [r0], #1
151 sub r2, r2, #1
152 strb r7, [r1], #1
153 .Lial1: ldrbt r6, [r0], #1
154 sub r2, r2, #1
155 strb r6, [r1], #1
156 .Lialend:
157
158 /*
159 * If few bytes left, finish slow.
160 */
161 cmp r2, #0x08
162 blt .Licleanup
163
164 /*
165 * If source is not aligned, finish slow.
166 */
167 ands r3, r0, #0x03
168 bne .Licleanup
169
170 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
171 blt .Licleanup8
172
173 /*
174 * Align destination to cacheline boundary.
175 * If source and destination are nicely aligned, this can be a big
176 * win. If not, it's still cheaper to copy in groups of 32 even if
177 * we don't get the nice cacheline alignment.
178 */
179 and r6, r1, #0x1f
180 ldr pc, [pc, r6]
181 b .Licaligned
182 .word .Licaligned
183 .word .Lical28
184 .word .Lical24
185 .word .Lical20
186 .word .Lical16
187 .word .Lical12
188 .word .Lical8
189 .word .Lical4
190 .Lical28:ldrt r6, [r0], #4
191 sub r2, r2, #4
192 str r6, [r1], #4
193 .Lical24:ldrt r7, [r0], #4
194 sub r2, r2, #4
195 str r7, [r1], #4
196 .Lical20:ldrt r6, [r0], #4
197 sub r2, r2, #4
198 str r6, [r1], #4
199 .Lical16:ldrt r7, [r0], #4
200 sub r2, r2, #4
201 str r7, [r1], #4
202 .Lical12:ldrt r6, [r0], #4
203 sub r2, r2, #4
204 str r6, [r1], #4
205 .Lical8:ldrt r7, [r0], #4
206 sub r2, r2, #4
207 str r7, [r1], #4
208 .Lical4:ldrt r6, [r0], #4
209 sub r2, r2, #4
210 str r6, [r1], #4
211
212 /*
213 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
214 * part of the code, and we may have knocked that down by as much
215 * as 0x1c getting aligned).
216 *
217 * This loop basically works out to:
218 * do {
219 * prefetch-next-cacheline(s)
220 * bytes -= 0x20;
221 * copy cacheline
222 * } while (bytes >= 0x40);
223 * bytes -= 0x20;
224 * copy cacheline
225 */
226 .Licaligned:
227 PREFETCH(r0, 32)
228 PREFETCH(r1, 32)
229
230 sub r2, r2, #0x20
231
232 /* Copy a cacheline */
233 ldrt r10, [r0], #4
234 ldrt r11, [r0], #4
235 ldrt r6, [r0], #4
236 ldrt r7, [r0], #4
237 ldrt r8, [r0], #4
238 ldrt r9, [r0], #4
239 stmia r1!, {r10-r11}
240 ldrt r10, [r0], #4
241 ldrt r11, [r0], #4
242 stmia r1!, {r6-r11}
243
244 cmp r2, #0x40
245 bge .Licaligned
246
247 sub r2, r2, #0x20
248
249 /* Copy a cacheline */
250 ldrt r10, [r0], #4
251 ldrt r11, [r0], #4
252 ldrt r6, [r0], #4
253 ldrt r7, [r0], #4
254 ldrt r8, [r0], #4
255 ldrt r9, [r0], #4
256 stmia r1!, {r10-r11}
257 ldrt r10, [r0], #4
258 ldrt r11, [r0], #4
259 stmia r1!, {r6-r11}
260
261 cmp r2, #0x08
262 blt .Liprecleanup
263
264 .Licleanup8:
265 ldrt r8, [r0], #4
266 ldrt r9, [r0], #4
267 sub r2, r2, #8
268 stmia r1!, {r8, r9}
269 cmp r2, #8
270 bge .Licleanup8
271
272 .Liprecleanup:
273 /*
274 * If we're done, bail.
275 */
276 cmp r2, #0
277 beq .Lout
278
279 .Licleanup:
280 and r6, r2, #0x3
281 ldr pc, [pc, r6, lsl #2]
282 b .Licend
283 .word .Lic4
284 .word .Lic1
285 .word .Lic2
286 .word .Lic3
287 .Lic4: ldrbt r6, [r0], #1
288 sub r2, r2, #1
289 strb r6, [r1], #1
290 .Lic3: ldrbt r7, [r0], #1
291 sub r2, r2, #1
292 strb r7, [r1], #1
293 .Lic2: ldrbt r6, [r0], #1
294 sub r2, r2, #1
295 strb r6, [r1], #1
296 .Lic1: ldrbt r7, [r0], #1
297 subs r2, r2, #1
298 strb r7, [r1], #1
299 .Licend:
300 bne .Licleanup
301
302 .Liout:
303 mov r0, #0
304
305 str r5, [r4, #PCB_ONFAULT]
306 RESTORE_REGS
307
308 RET
309
310 .Lcopyfault:
311 ldr r0, =EFAULT
312 str r5, [r4, #PCB_ONFAULT]
313 RESTORE_REGS
314
315 RET
316 END(copyin)
317
318 /*
319 * r0 = kernel space address
320 * r1 = user space address
321 * r2 = length
322 *
323 * Copies bytes from kernel space to user space
324 *
325 * We save/restore r4-r11:
326 * r4-r11 are scratch
327 */
328
329 ENTRY(copyout)
330 /* Quick exit if length is zero */
331 teq r2, #0
332 moveq r0, #0
333 RETeq
334
335 ldr r3, .L_arm_memcpy
336 ldr r3, [r3]
337 cmp r3, #0
338 beq .Lnormale
339 ldr r3, .L_min_memcpy_size
340 ldr r3, [r3]
341 cmp r2, r3
342 blt .Lnormale
343 stmfd sp!, {r0-r2, r4, lr}
344 mov r3, r0
345 mov r0, r1
346 mov r1, r3
347 mov r3, #1 /* DST_IS_USER */
348 ldr r4, .L_arm_memcpy
349 mov lr, pc
350 ldr pc, [r4]
351 cmp r0, #0
352 ldmfd sp!, {r0-r2, r4, lr}
353 moveq r0, #0
354 RETeq
355
356 .Lnormale:
357 SAVE_REGS
358 GET_PCB(r4)
359 ldr r4, [r4]
360
361 ldr r5, [r4, #PCB_ONFAULT]
362 adr r3, .Lcopyfault
363 str r3, [r4, #PCB_ONFAULT]
364
365 PREFETCH(r0, 0)
366 PREFETCH(r1, 0)
367
368 /*
369 * If not too many bytes, take the slow path.
370 */
371 cmp r2, #0x08
372 blt .Lcleanup
373
374 /*
375 * Align destination to word boundary.
376 */
377 and r6, r1, #0x3
378 ldr pc, [pc, r6, lsl #2]
379 b .Lalend
380 .word .Lalend
381 .word .Lal3
382 .word .Lal2
383 .word .Lal1
384 .Lal3: ldrb r6, [r0], #1
385 sub r2, r2, #1
386 strbt r6, [r1], #1
387 .Lal2: ldrb r7, [r0], #1
388 sub r2, r2, #1
389 strbt r7, [r1], #1
390 .Lal1: ldrb r6, [r0], #1
391 sub r2, r2, #1
392 strbt r6, [r1], #1
393 .Lalend:
394
395 /*
396 * If few bytes left, finish slow.
397 */
398 cmp r2, #0x08
399 blt .Lcleanup
400
401 /*
402 * If source is not aligned, finish slow.
403 */
404 ands r3, r0, #0x03
405 bne .Lcleanup
406
407 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
408 blt .Lcleanup8
409
410 /*
411 * Align source & destination to cacheline boundary.
412 */
413 and r6, r1, #0x1f
414 ldr pc, [pc, r6]
415 b .Lcaligned
416 .word .Lcaligned
417 .word .Lcal28
418 .word .Lcal24
419 .word .Lcal20
420 .word .Lcal16
421 .word .Lcal12
422 .word .Lcal8
423 .word .Lcal4
424 .Lcal28:ldr r6, [r0], #4
425 sub r2, r2, #4
426 strt r6, [r1], #4
427 .Lcal24:ldr r7, [r0], #4
428 sub r2, r2, #4
429 strt r7, [r1], #4
430 .Lcal20:ldr r6, [r0], #4
431 sub r2, r2, #4
432 strt r6, [r1], #4
433 .Lcal16:ldr r7, [r0], #4
434 sub r2, r2, #4
435 strt r7, [r1], #4
436 .Lcal12:ldr r6, [r0], #4
437 sub r2, r2, #4
438 strt r6, [r1], #4
439 .Lcal8: ldr r7, [r0], #4
440 sub r2, r2, #4
441 strt r7, [r1], #4
442 .Lcal4: ldr r6, [r0], #4
443 sub r2, r2, #4
444 strt r6, [r1], #4
445
446 /*
447 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
448 * part of the code, and we may have knocked that down by as much
449 * as 0x1c getting aligned).
450 *
451 * This loop basically works out to:
452 * do {
453 * prefetch-next-cacheline(s)
454 * bytes -= 0x20;
455 * copy cacheline
456 * } while (bytes >= 0x40);
457 * bytes -= 0x20;
458 * copy cacheline
459 */
460 .Lcaligned:
461 PREFETCH(r0, 32)
462 PREFETCH(r1, 32)
463
464 sub r2, r2, #0x20
465
466 /* Copy a cacheline */
467 ldmia r0!, {r6-r11}
468 strt r6, [r1], #4
469 strt r7, [r1], #4
470 ldmia r0!, {r6-r7}
471 strt r8, [r1], #4
472 strt r9, [r1], #4
473 strt r10, [r1], #4
474 strt r11, [r1], #4
475 strt r6, [r1], #4
476 strt r7, [r1], #4
477
478 cmp r2, #0x40
479 bge .Lcaligned
480
481 sub r2, r2, #0x20
482
483 /* Copy a cacheline */
484 ldmia r0!, {r6-r11}
485 strt r6, [r1], #4
486 strt r7, [r1], #4
487 ldmia r0!, {r6-r7}
488 strt r8, [r1], #4
489 strt r9, [r1], #4
490 strt r10, [r1], #4
491 strt r11, [r1], #4
492 strt r6, [r1], #4
493 strt r7, [r1], #4
494
495 cmp r2, #0x08
496 blt .Lprecleanup
497
498 .Lcleanup8:
499 ldmia r0!, {r8-r9}
500 sub r2, r2, #8
501 strt r8, [r1], #4
502 strt r9, [r1], #4
503 cmp r2, #8
504 bge .Lcleanup8
505
506 .Lprecleanup:
507 /*
508 * If we're done, bail.
509 */
510 cmp r2, #0
511 beq .Lout
512
513 .Lcleanup:
514 and r6, r2, #0x3
515 ldr pc, [pc, r6, lsl #2]
516 b .Lcend
517 .word .Lc4
518 .word .Lc1
519 .word .Lc2
520 .word .Lc3
521 .Lc4: ldrb r6, [r0], #1
522 sub r2, r2, #1
523 strbt r6, [r1], #1
524 .Lc3: ldrb r7, [r0], #1
525 sub r2, r2, #1
526 strbt r7, [r1], #1
527 .Lc2: ldrb r6, [r0], #1
528 sub r2, r2, #1
529 strbt r6, [r1], #1
530 .Lc1: ldrb r7, [r0], #1
531 subs r2, r2, #1
532 strbt r7, [r1], #1
533 .Lcend:
534 bne .Lcleanup
535
536 .Lout:
537 mov r0, #0
538
539 str r5, [r4, #PCB_ONFAULT]
540 RESTORE_REGS
541
542 RET
543 END(copyout)
544 #endif
545
546 /*
547 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
548 *
549 * Copies a single 8-bit value from src to dest, returning 0 on success,
550 * else EFAULT if a page fault occurred.
551 */
552 ENTRY(badaddr_read_1)
553 GET_PCB(r2)
554 ldr r2, [r2]
555
556 ldr ip, [r2, #PCB_ONFAULT]
557 adr r3, 1f
558 str r3, [r2, #PCB_ONFAULT]
559 nop
560 nop
561 nop
562 ldrb r3, [r0]
563 nop
564 nop
565 nop
566 strb r3, [r1]
567 mov r0, #0 /* No fault */
568 1: str ip, [r2, #PCB_ONFAULT]
569 RET
570 END(badaddr_read_1)
571
572 /*
573 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
574 *
575 * Copies a single 16-bit value from src to dest, returning 0 on success,
576 * else EFAULT if a page fault occurred.
577 */
578 ENTRY(badaddr_read_2)
579 GET_PCB(r2)
580 ldr r2, [r2]
581
582 ldr ip, [r2, #PCB_ONFAULT]
583 adr r3, 1f
584 str r3, [r2, #PCB_ONFAULT]
585 nop
586 nop
587 nop
588 ldrh r3, [r0]
589 nop
590 nop
591 nop
592 strh r3, [r1]
593 mov r0, #0 /* No fault */
594 1: str ip, [r2, #PCB_ONFAULT]
595 RET
596 END(badaddr_read_2)
597
598 /*
599 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
600 *
601 * Copies a single 32-bit value from src to dest, returning 0 on success,
602 * else EFAULT if a page fault occurred.
603 */
604 ENTRY(badaddr_read_4)
605 GET_PCB(r2)
606 ldr r2, [r2]
607
608 ldr ip, [r2, #PCB_ONFAULT]
609 adr r3, 1f
610 str r3, [r2, #PCB_ONFAULT]
611 nop
612 nop
613 nop
614 ldr r3, [r0]
615 nop
616 nop
617 nop
618 str r3, [r1]
619 mov r0, #0 /* No fault */
620 1: str ip, [r2, #PCB_ONFAULT]
621 RET
622 END(badaddr_read_4)
623
Cache object: 5a802186ee283c16417419ad3d7db3b6
|