1 /* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39 #include "assym.s"
40
41 #include <machine/asm.h>
42 #include <sys/errno.h>
43
44 .L_arm_memcpy:
45 .word _C_LABEL(_arm_memcpy)
46 .L_min_memcpy_size:
47 .word _C_LABEL(_min_memcpy_size)
48
49 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/bcopyinout.S 248361 2013-03-16 02:48:49Z andrew $");
50 #ifdef _ARM_ARCH_5E
51 #include <arm/arm/bcopyinout_xscale.S>
52 #else
53
54 .text
55 .align 0
56
57 #ifdef _ARM_ARCH_6
58 #define GET_PCB(tmp) \
59 mrc p15, 0, tmp, c13, c0, 4; \
60 add tmp, tmp, #(PC_CURPCB)
61 #else
62 .Lcurpcb:
63 .word _C_LABEL(__pcpu) + PC_CURPCB
64
65 #define GET_PCB(tmp) \
66 ldr tmp, .Lcurpcb
67 #endif
68
69
70 #define SAVE_REGS stmfd sp!, {r4-r11}
71 #define RESTORE_REGS ldmfd sp!, {r4-r11}
72
73 #if defined(_ARM_ARCH_5E)
74 #define HELLOCPP #
75 #define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
76 #else
77 #define PREFETCH(rx,o)
78 #endif
79
80 /*
81 * r0 = user space address
82 * r1 = kernel space address
83 * r2 = length
84 *
85 * Copies bytes from user space to kernel space
86 *
87 * We save/restore r4-r11:
88 * r4-r11 are scratch
89 */
90 ENTRY(copyin)
91 /* Quick exit if length is zero */
92 teq r2, #0
93 moveq r0, #0
94 RETeq
95
96 ldr r3, .L_arm_memcpy
97 ldr r3, [r3]
98 cmp r3, #0
99 beq .Lnormal
100 ldr r3, .L_min_memcpy_size
101 ldr r3, [r3]
102 cmp r2, r3
103 blt .Lnormal
104 stmfd sp!, {r0-r2, r4, lr}
105 mov r3, r0
106 mov r0, r1
107 mov r1, r3
108 mov r3, #2 /* SRC_IS_USER */
109 ldr r4, .L_arm_memcpy
110 mov lr, pc
111 ldr pc, [r4]
112 cmp r0, #0
113 ldmfd sp!, {r0-r2, r4, lr}
114 moveq r0, #0
115 RETeq
116
117 .Lnormal:
118 SAVE_REGS
119 GET_PCB(r4)
120 ldr r4, [r4]
121
122
123 ldr r5, [r4, #PCB_ONFAULT]
124 adr r3, .Lcopyfault
125 str r3, [r4, #PCB_ONFAULT]
126
127 PREFETCH(r0, 0)
128 PREFETCH(r1, 0)
129
130 /*
131 * If not too many bytes, take the slow path.
132 */
133 cmp r2, #0x08
134 blt .Licleanup
135
136 /*
137 * Align destination to word boundary.
138 */
139 and r6, r1, #0x3
140 ldr pc, [pc, r6, lsl #2]
141 b .Lialend
142 .word .Lialend
143 .word .Lial3
144 .word .Lial2
145 .word .Lial1
146 .Lial3: ldrbt r6, [r0], #1
147 sub r2, r2, #1
148 strb r6, [r1], #1
149 .Lial2: ldrbt r7, [r0], #1
150 sub r2, r2, #1
151 strb r7, [r1], #1
152 .Lial1: ldrbt r6, [r0], #1
153 sub r2, r2, #1
154 strb r6, [r1], #1
155 .Lialend:
156
157 /*
158 * If few bytes left, finish slow.
159 */
160 cmp r2, #0x08
161 blt .Licleanup
162
163 /*
164 * If source is not aligned, finish slow.
165 */
166 ands r3, r0, #0x03
167 bne .Licleanup
168
169 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
170 blt .Licleanup8
171
172 /*
173 * Align destination to cacheline boundary.
174 * If source and destination are nicely aligned, this can be a big
175 * win. If not, it's still cheaper to copy in groups of 32 even if
176 * we don't get the nice cacheline alignment.
177 */
178 and r6, r1, #0x1f
179 ldr pc, [pc, r6]
180 b .Licaligned
181 .word .Licaligned
182 .word .Lical28
183 .word .Lical24
184 .word .Lical20
185 .word .Lical16
186 .word .Lical12
187 .word .Lical8
188 .word .Lical4
189 .Lical28:ldrt r6, [r0], #4
190 sub r2, r2, #4
191 str r6, [r1], #4
192 .Lical24:ldrt r7, [r0], #4
193 sub r2, r2, #4
194 str r7, [r1], #4
195 .Lical20:ldrt r6, [r0], #4
196 sub r2, r2, #4
197 str r6, [r1], #4
198 .Lical16:ldrt r7, [r0], #4
199 sub r2, r2, #4
200 str r7, [r1], #4
201 .Lical12:ldrt r6, [r0], #4
202 sub r2, r2, #4
203 str r6, [r1], #4
204 .Lical8:ldrt r7, [r0], #4
205 sub r2, r2, #4
206 str r7, [r1], #4
207 .Lical4:ldrt r6, [r0], #4
208 sub r2, r2, #4
209 str r6, [r1], #4
210
211 /*
212 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
213 * part of the code, and we may have knocked that down by as much
214 * as 0x1c getting aligned).
215 *
216 * This loop basically works out to:
217 * do {
218 * prefetch-next-cacheline(s)
219 * bytes -= 0x20;
220 * copy cacheline
221 * } while (bytes >= 0x40);
222 * bytes -= 0x20;
223 * copy cacheline
224 */
225 .Licaligned:
226 PREFETCH(r0, 32)
227 PREFETCH(r1, 32)
228
229 sub r2, r2, #0x20
230
231 /* Copy a cacheline */
232 ldrt r10, [r0], #4
233 ldrt r11, [r0], #4
234 ldrt r6, [r0], #4
235 ldrt r7, [r0], #4
236 ldrt r8, [r0], #4
237 ldrt r9, [r0], #4
238 stmia r1!, {r10-r11}
239 ldrt r10, [r0], #4
240 ldrt r11, [r0], #4
241 stmia r1!, {r6-r11}
242
243 cmp r2, #0x40
244 bge .Licaligned
245
246 sub r2, r2, #0x20
247
248 /* Copy a cacheline */
249 ldrt r10, [r0], #4
250 ldrt r11, [r0], #4
251 ldrt r6, [r0], #4
252 ldrt r7, [r0], #4
253 ldrt r8, [r0], #4
254 ldrt r9, [r0], #4
255 stmia r1!, {r10-r11}
256 ldrt r10, [r0], #4
257 ldrt r11, [r0], #4
258 stmia r1!, {r6-r11}
259
260 cmp r2, #0x08
261 blt .Liprecleanup
262
263 .Licleanup8:
264 ldrt r8, [r0], #4
265 ldrt r9, [r0], #4
266 sub r2, r2, #8
267 stmia r1!, {r8, r9}
268 cmp r2, #8
269 bge .Licleanup8
270
271 .Liprecleanup:
272 /*
273 * If we're done, bail.
274 */
275 cmp r2, #0
276 beq .Lout
277
278 .Licleanup:
279 and r6, r2, #0x3
280 ldr pc, [pc, r6, lsl #2]
281 b .Licend
282 .word .Lic4
283 .word .Lic1
284 .word .Lic2
285 .word .Lic3
286 .Lic4: ldrbt r6, [r0], #1
287 sub r2, r2, #1
288 strb r6, [r1], #1
289 .Lic3: ldrbt r7, [r0], #1
290 sub r2, r2, #1
291 strb r7, [r1], #1
292 .Lic2: ldrbt r6, [r0], #1
293 sub r2, r2, #1
294 strb r6, [r1], #1
295 .Lic1: ldrbt r7, [r0], #1
296 subs r2, r2, #1
297 strb r7, [r1], #1
298 .Licend:
299 bne .Licleanup
300
301 .Liout:
302 mov r0, #0
303
304 str r5, [r4, #PCB_ONFAULT]
305 RESTORE_REGS
306
307 RET
308
309 .Lcopyfault:
310 ldr r0, =EFAULT
311 str r5, [r4, #PCB_ONFAULT]
312 RESTORE_REGS
313
314 RET
315 END(copyin)
316
317 /*
318 * r0 = kernel space address
319 * r1 = user space address
320 * r2 = length
321 *
322 * Copies bytes from kernel space to user space
323 *
324 * We save/restore r4-r11:
325 * r4-r11 are scratch
326 */
327
328 ENTRY(copyout)
329 /* Quick exit if length is zero */
330 teq r2, #0
331 moveq r0, #0
332 RETeq
333
334 ldr r3, .L_arm_memcpy
335 ldr r3, [r3]
336 cmp r3, #0
337 beq .Lnormale
338 ldr r3, .L_min_memcpy_size
339 ldr r3, [r3]
340 cmp r2, r3
341 blt .Lnormale
342 stmfd sp!, {r0-r2, r4, lr}
343 mov r3, r0
344 mov r0, r1
345 mov r1, r3
346 mov r3, #1 /* DST_IS_USER */
347 ldr r4, .L_arm_memcpy
348 mov lr, pc
349 ldr pc, [r4]
350 cmp r0, #0
351 ldmfd sp!, {r0-r2, r4, lr}
352 moveq r0, #0
353 RETeq
354
355 .Lnormale:
356 SAVE_REGS
357 GET_PCB(r4)
358 ldr r4, [r4]
359
360 ldr r5, [r4, #PCB_ONFAULT]
361 adr r3, .Lcopyfault
362 str r3, [r4, #PCB_ONFAULT]
363
364 PREFETCH(r0, 0)
365 PREFETCH(r1, 0)
366
367 /*
368 * If not too many bytes, take the slow path.
369 */
370 cmp r2, #0x08
371 blt .Lcleanup
372
373 /*
374 * Align destination to word boundary.
375 */
376 and r6, r1, #0x3
377 ldr pc, [pc, r6, lsl #2]
378 b .Lalend
379 .word .Lalend
380 .word .Lal3
381 .word .Lal2
382 .word .Lal1
383 .Lal3: ldrb r6, [r0], #1
384 sub r2, r2, #1
385 strbt r6, [r1], #1
386 .Lal2: ldrb r7, [r0], #1
387 sub r2, r2, #1
388 strbt r7, [r1], #1
389 .Lal1: ldrb r6, [r0], #1
390 sub r2, r2, #1
391 strbt r6, [r1], #1
392 .Lalend:
393
394 /*
395 * If few bytes left, finish slow.
396 */
397 cmp r2, #0x08
398 blt .Lcleanup
399
400 /*
401 * If source is not aligned, finish slow.
402 */
403 ands r3, r0, #0x03
404 bne .Lcleanup
405
406 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
407 blt .Lcleanup8
408
409 /*
410 * Align source & destination to cacheline boundary.
411 */
412 and r6, r1, #0x1f
413 ldr pc, [pc, r6]
414 b .Lcaligned
415 .word .Lcaligned
416 .word .Lcal28
417 .word .Lcal24
418 .word .Lcal20
419 .word .Lcal16
420 .word .Lcal12
421 .word .Lcal8
422 .word .Lcal4
423 .Lcal28:ldr r6, [r0], #4
424 sub r2, r2, #4
425 strt r6, [r1], #4
426 .Lcal24:ldr r7, [r0], #4
427 sub r2, r2, #4
428 strt r7, [r1], #4
429 .Lcal20:ldr r6, [r0], #4
430 sub r2, r2, #4
431 strt r6, [r1], #4
432 .Lcal16:ldr r7, [r0], #4
433 sub r2, r2, #4
434 strt r7, [r1], #4
435 .Lcal12:ldr r6, [r0], #4
436 sub r2, r2, #4
437 strt r6, [r1], #4
438 .Lcal8: ldr r7, [r0], #4
439 sub r2, r2, #4
440 strt r7, [r1], #4
441 .Lcal4: ldr r6, [r0], #4
442 sub r2, r2, #4
443 strt r6, [r1], #4
444
445 /*
446 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
447 * part of the code, and we may have knocked that down by as much
448 * as 0x1c getting aligned).
449 *
450 * This loop basically works out to:
451 * do {
452 * prefetch-next-cacheline(s)
453 * bytes -= 0x20;
454 * copy cacheline
455 * } while (bytes >= 0x40);
456 * bytes -= 0x20;
457 * copy cacheline
458 */
459 .Lcaligned:
460 PREFETCH(r0, 32)
461 PREFETCH(r1, 32)
462
463 sub r2, r2, #0x20
464
465 /* Copy a cacheline */
466 ldmia r0!, {r6-r11}
467 strt r6, [r1], #4
468 strt r7, [r1], #4
469 ldmia r0!, {r6-r7}
470 strt r8, [r1], #4
471 strt r9, [r1], #4
472 strt r10, [r1], #4
473 strt r11, [r1], #4
474 strt r6, [r1], #4
475 strt r7, [r1], #4
476
477 cmp r2, #0x40
478 bge .Lcaligned
479
480 sub r2, r2, #0x20
481
482 /* Copy a cacheline */
483 ldmia r0!, {r6-r11}
484 strt r6, [r1], #4
485 strt r7, [r1], #4
486 ldmia r0!, {r6-r7}
487 strt r8, [r1], #4
488 strt r9, [r1], #4
489 strt r10, [r1], #4
490 strt r11, [r1], #4
491 strt r6, [r1], #4
492 strt r7, [r1], #4
493
494 cmp r2, #0x08
495 blt .Lprecleanup
496
497 .Lcleanup8:
498 ldmia r0!, {r8-r9}
499 sub r2, r2, #8
500 strt r8, [r1], #4
501 strt r9, [r1], #4
502 cmp r2, #8
503 bge .Lcleanup8
504
505 .Lprecleanup:
506 /*
507 * If we're done, bail.
508 */
509 cmp r2, #0
510 beq .Lout
511
512 .Lcleanup:
513 and r6, r2, #0x3
514 ldr pc, [pc, r6, lsl #2]
515 b .Lcend
516 .word .Lc4
517 .word .Lc1
518 .word .Lc2
519 .word .Lc3
520 .Lc4: ldrb r6, [r0], #1
521 sub r2, r2, #1
522 strbt r6, [r1], #1
523 .Lc3: ldrb r7, [r0], #1
524 sub r2, r2, #1
525 strbt r7, [r1], #1
526 .Lc2: ldrb r6, [r0], #1
527 sub r2, r2, #1
528 strbt r6, [r1], #1
529 .Lc1: ldrb r7, [r0], #1
530 subs r2, r2, #1
531 strbt r7, [r1], #1
532 .Lcend:
533 bne .Lcleanup
534
535 .Lout:
536 mov r0, #0
537
538 str r5, [r4, #PCB_ONFAULT]
539 RESTORE_REGS
540
541 RET
542 END(copyout)
543 #endif
544
545 /*
546 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
547 *
548 * Copies a single 8-bit value from src to dest, returning 0 on success,
549 * else EFAULT if a page fault occurred.
550 */
551 ENTRY(badaddr_read_1)
552 GET_PCB(r2)
553 ldr r2, [r2]
554
555 ldr ip, [r2, #PCB_ONFAULT]
556 adr r3, 1f
557 str r3, [r2, #PCB_ONFAULT]
558 nop
559 nop
560 nop
561 ldrb r3, [r0]
562 nop
563 nop
564 nop
565 strb r3, [r1]
566 mov r0, #0 /* No fault */
567 1: str ip, [r2, #PCB_ONFAULT]
568 RET
569 END(badaddr_read_1)
570
571 /*
572 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
573 *
574 * Copies a single 16-bit value from src to dest, returning 0 on success,
575 * else EFAULT if a page fault occurred.
576 */
577 ENTRY(badaddr_read_2)
578 GET_PCB(r2)
579 ldr r2, [r2]
580
581 ldr ip, [r2, #PCB_ONFAULT]
582 adr r3, 1f
583 str r3, [r2, #PCB_ONFAULT]
584 nop
585 nop
586 nop
587 ldrh r3, [r0]
588 nop
589 nop
590 nop
591 strh r3, [r1]
592 mov r0, #0 /* No fault */
593 1: str ip, [r2, #PCB_ONFAULT]
594 RET
595 END(badaddr_read_2)
596
597 /*
598 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
599 *
600 * Copies a single 32-bit value from src to dest, returning 0 on success,
601 * else EFAULT if a page fault occurred.
602 */
603 ENTRY(badaddr_read_4)
604 GET_PCB(r2)
605 ldr r2, [r2]
606
607 ldr ip, [r2, #PCB_ONFAULT]
608 adr r3, 1f
609 str r3, [r2, #PCB_ONFAULT]
610 nop
611 nop
612 nop
613 ldr r3, [r0]
614 nop
615 nop
616 nop
617 str r3, [r1]
618 mov r0, #0 /* No fault */
619 1: str ip, [r2, #PCB_ONFAULT]
620 RET
621 END(badaddr_read_4)
622
Cache object: 8582909d3d65d4a034579f065c3e68ec
|