1 /* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39 #include "assym.s"
40
41 #include <machine/asm.h>
42
43 __FBSDID("$FreeBSD: releng/6.2/sys/arm/arm/bcopyinout.S 146604 2005-05-24 23:55:09Z cognet $");
44 #ifdef __XSCALE__
45 #include <arm/arm/bcopyinout_xscale.S>
46 #else
47
48 .text
49 .align 0
50
51 #ifdef MULTIPROCESSOR
52 .Lcpu_info:
53 .word _C_LABEL(cpu_info)
54 #else
55 .Lcurpcb:
56 .word _C_LABEL(__pcpu) + PC_CURPCB
57 #endif
58
59 #define SAVE_REGS stmfd sp!, {r4-r11}
60 #define RESTORE_REGS ldmfd sp!, {r4-r11}
61
62 #if defined(__XSCALE__)
63 #define HELLOCPP #
64 #define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
65 #else
66 #define PREFETCH(rx,o)
67 #endif
68
69 /*
70 * r0 = user space address
71 * r1 = kernel space address
72 * r2 = length
73 *
74 * Copies bytes from user space to kernel space
75 *
76 * We save/restore r4-r11:
77 * r4-r11 are scratch
78 */
79 ENTRY(copyin)
80 /* Quick exit if length is zero */
81 teq r2, #0
82 moveq r0, #0
83 RETeq
84
85 SAVE_REGS
86 #ifdef MULTIPROCESSOR
87 /* XXX Probably not appropriate for non-Hydra SMPs */
88 stmfd sp!, {r0-r2, r14}
89 bl _C_LABEL(cpu_number)
90 ldr r4, .Lcpu_info
91 ldr r4, [r4, r0, lsl #2]
92 ldr r4, [r4, #CI_CURPCB]
93 ldmfd sp!, {r0-r2, r14}
94 #else
95 ldr r4, .Lcurpcb
96 ldr r4, [r4]
97 #endif
98
99 ldr r5, [r4, #PCB_ONFAULT]
100 adr r3, .Lcopyfault
101 str r3, [r4, #PCB_ONFAULT]
102
103 PREFETCH(r0, 0)
104 PREFETCH(r1, 0)
105
106 /*
107 * If not too many bytes, take the slow path.
108 */
109 cmp r2, #0x08
110 blt .Licleanup
111
112 /*
113 * Align destination to word boundary.
114 */
115 and r6, r1, #0x3
116 ldr pc, [pc, r6, lsl #2]
117 b .Lialend
118 .word .Lialend
119 .word .Lial3
120 .word .Lial2
121 .word .Lial1
122 .Lial3: ldrbt r6, [r0], #1
123 sub r2, r2, #1
124 strb r6, [r1], #1
125 .Lial2: ldrbt r7, [r0], #1
126 sub r2, r2, #1
127 strb r7, [r1], #1
128 .Lial1: ldrbt r6, [r0], #1
129 sub r2, r2, #1
130 strb r6, [r1], #1
131 .Lialend:
132
133 /*
134 * If few bytes left, finish slow.
135 */
136 cmp r2, #0x08
137 blt .Licleanup
138
139 /*
140 * If source is not aligned, finish slow.
141 */
142 ands r3, r0, #0x03
143 bne .Licleanup
144
145 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
146 blt .Licleanup8
147
148 /*
149 * Align destination to cacheline boundary.
150 * If source and destination are nicely aligned, this can be a big
151 * win. If not, it's still cheaper to copy in groups of 32 even if
152 * we don't get the nice cacheline alignment.
153 */
154 and r6, r1, #0x1f
155 ldr pc, [pc, r6]
156 b .Licaligned
157 .word .Licaligned
158 .word .Lical28
159 .word .Lical24
160 .word .Lical20
161 .word .Lical16
162 .word .Lical12
163 .word .Lical8
164 .word .Lical4
165 .Lical28:ldrt r6, [r0], #4
166 sub r2, r2, #4
167 str r6, [r1], #4
168 .Lical24:ldrt r7, [r0], #4
169 sub r2, r2, #4
170 str r7, [r1], #4
171 .Lical20:ldrt r6, [r0], #4
172 sub r2, r2, #4
173 str r6, [r1], #4
174 .Lical16:ldrt r7, [r0], #4
175 sub r2, r2, #4
176 str r7, [r1], #4
177 .Lical12:ldrt r6, [r0], #4
178 sub r2, r2, #4
179 str r6, [r1], #4
180 .Lical8:ldrt r7, [r0], #4
181 sub r2, r2, #4
182 str r7, [r1], #4
183 .Lical4:ldrt r6, [r0], #4
184 sub r2, r2, #4
185 str r6, [r1], #4
186
187 /*
188 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
189 * part of the code, and we may have knocked that down by as much
190 * as 0x1c getting aligned).
191 *
192 * This loop basically works out to:
193 * do {
194 * prefetch-next-cacheline(s)
195 * bytes -= 0x20;
196 * copy cacheline
197 * } while (bytes >= 0x40);
198 * bytes -= 0x20;
199 * copy cacheline
200 */
201 .Licaligned:
202 PREFETCH(r0, 32)
203 PREFETCH(r1, 32)
204
205 sub r2, r2, #0x20
206
207 /* Copy a cacheline */
208 ldrt r10, [r0], #4
209 ldrt r11, [r0], #4
210 ldrt r6, [r0], #4
211 ldrt r7, [r0], #4
212 ldrt r8, [r0], #4
213 ldrt r9, [r0], #4
214 stmia r1!, {r10-r11}
215 ldrt r10, [r0], #4
216 ldrt r11, [r0], #4
217 stmia r1!, {r6-r11}
218
219 cmp r2, #0x40
220 bge .Licaligned
221
222 sub r2, r2, #0x20
223
224 /* Copy a cacheline */
225 ldrt r10, [r0], #4
226 ldrt r11, [r0], #4
227 ldrt r6, [r0], #4
228 ldrt r7, [r0], #4
229 ldrt r8, [r0], #4
230 ldrt r9, [r0], #4
231 stmia r1!, {r10-r11}
232 ldrt r10, [r0], #4
233 ldrt r11, [r0], #4
234 stmia r1!, {r6-r11}
235
236 cmp r2, #0x08
237 blt .Liprecleanup
238
239 .Licleanup8:
240 ldrt r8, [r0], #4
241 ldrt r9, [r0], #4
242 sub r2, r2, #8
243 stmia r1!, {r8, r9}
244 cmp r2, #8
245 bge .Licleanup8
246
247 .Liprecleanup:
248 /*
249 * If we're done, bail.
250 */
251 cmp r2, #0
252 beq .Lout
253
254 .Licleanup:
255 and r6, r2, #0x3
256 ldr pc, [pc, r6, lsl #2]
257 b .Licend
258 .word .Lic4
259 .word .Lic1
260 .word .Lic2
261 .word .Lic3
262 .Lic4: ldrbt r6, [r0], #1
263 sub r2, r2, #1
264 strb r6, [r1], #1
265 .Lic3: ldrbt r7, [r0], #1
266 sub r2, r2, #1
267 strb r7, [r1], #1
268 .Lic2: ldrbt r6, [r0], #1
269 sub r2, r2, #1
270 strb r6, [r1], #1
271 .Lic1: ldrbt r7, [r0], #1
272 subs r2, r2, #1
273 strb r7, [r1], #1
274 .Licend:
275 bne .Licleanup
276
277 .Liout:
278 mov r0, #0
279
280 str r5, [r4, #PCB_ONFAULT]
281 RESTORE_REGS
282
283 RET
284
285 .Lcopyfault:
286 mov r0, #14 /* EFAULT */
287 str r5, [r4, #PCB_ONFAULT]
288 RESTORE_REGS
289
290 RET
291
292 /*
293 * r0 = kernel space address
294 * r1 = user space address
295 * r2 = length
296 *
297 * Copies bytes from kernel space to user space
298 *
299 * We save/restore r4-r11:
300 * r4-r11 are scratch
301 */
302
303 ENTRY(copyout)
304 /* Quick exit if length is zero */
305 teq r2, #0
306 moveq r0, #0
307 RETeq
308
309 SAVE_REGS
310 #ifdef MULTIPROCESSOR
311 /* XXX Probably not appropriate for non-Hydra SMPs */
312 stmfd sp!, {r0-r2, r14}
313 bl _C_LABEL(cpu_number)
314 ldr r4, .Lcpu_info
315 ldr r4, [r4, r0, lsl #2]
316 ldr r4, [r4, #CI_CURPCB]
317 ldmfd sp!, {r0-r2, r14}
318 #else
319 ldr r4, .Lcurpcb
320 ldr r4, [r4]
321 #endif
322
323 ldr r5, [r4, #PCB_ONFAULT]
324 adr r3, .Lcopyfault
325 str r3, [r4, #PCB_ONFAULT]
326
327 PREFETCH(r0, 0)
328 PREFETCH(r1, 0)
329
330 /*
331 * If not too many bytes, take the slow path.
332 */
333 cmp r2, #0x08
334 blt .Lcleanup
335
336 /*
337 * Align destination to word boundary.
338 */
339 and r6, r1, #0x3
340 ldr pc, [pc, r6, lsl #2]
341 b .Lalend
342 .word .Lalend
343 .word .Lal3
344 .word .Lal2
345 .word .Lal1
346 .Lal3: ldrb r6, [r0], #1
347 sub r2, r2, #1
348 strbt r6, [r1], #1
349 .Lal2: ldrb r7, [r0], #1
350 sub r2, r2, #1
351 strbt r7, [r1], #1
352 .Lal1: ldrb r6, [r0], #1
353 sub r2, r2, #1
354 strbt r6, [r1], #1
355 .Lalend:
356
357 /*
358 * If few bytes left, finish slow.
359 */
360 cmp r2, #0x08
361 blt .Lcleanup
362
363 /*
364 * If source is not aligned, finish slow.
365 */
366 ands r3, r0, #0x03
367 bne .Lcleanup
368
369 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
370 blt .Lcleanup8
371
372 /*
373 * Align source & destination to cacheline boundary.
374 */
375 and r6, r1, #0x1f
376 ldr pc, [pc, r6]
377 b .Lcaligned
378 .word .Lcaligned
379 .word .Lcal28
380 .word .Lcal24
381 .word .Lcal20
382 .word .Lcal16
383 .word .Lcal12
384 .word .Lcal8
385 .word .Lcal4
386 .Lcal28:ldr r6, [r0], #4
387 sub r2, r2, #4
388 strt r6, [r1], #4
389 .Lcal24:ldr r7, [r0], #4
390 sub r2, r2, #4
391 strt r7, [r1], #4
392 .Lcal20:ldr r6, [r0], #4
393 sub r2, r2, #4
394 strt r6, [r1], #4
395 .Lcal16:ldr r7, [r0], #4
396 sub r2, r2, #4
397 strt r7, [r1], #4
398 .Lcal12:ldr r6, [r0], #4
399 sub r2, r2, #4
400 strt r6, [r1], #4
401 .Lcal8: ldr r7, [r0], #4
402 sub r2, r2, #4
403 strt r7, [r1], #4
404 .Lcal4: ldr r6, [r0], #4
405 sub r2, r2, #4
406 strt r6, [r1], #4
407
408 /*
409 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
410 * part of the code, and we may have knocked that down by as much
411 * as 0x1c getting aligned).
412 *
413 * This loop basically works out to:
414 * do {
415 * prefetch-next-cacheline(s)
416 * bytes -= 0x20;
417 * copy cacheline
418 * } while (bytes >= 0x40);
419 * bytes -= 0x20;
420 * copy cacheline
421 */
422 .Lcaligned:
423 PREFETCH(r0, 32)
424 PREFETCH(r1, 32)
425
426 sub r2, r2, #0x20
427
428 /* Copy a cacheline */
429 ldmia r0!, {r6-r11}
430 strt r6, [r1], #4
431 strt r7, [r1], #4
432 ldmia r0!, {r6-r7}
433 strt r8, [r1], #4
434 strt r9, [r1], #4
435 strt r10, [r1], #4
436 strt r11, [r1], #4
437 strt r6, [r1], #4
438 strt r7, [r1], #4
439
440 cmp r2, #0x40
441 bge .Lcaligned
442
443 sub r2, r2, #0x20
444
445 /* Copy a cacheline */
446 ldmia r0!, {r6-r11}
447 strt r6, [r1], #4
448 strt r7, [r1], #4
449 ldmia r0!, {r6-r7}
450 strt r8, [r1], #4
451 strt r9, [r1], #4
452 strt r10, [r1], #4
453 strt r11, [r1], #4
454 strt r6, [r1], #4
455 strt r7, [r1], #4
456
457 cmp r2, #0x08
458 blt .Lprecleanup
459
460 .Lcleanup8:
461 ldmia r0!, {r8-r9}
462 sub r2, r2, #8
463 strt r8, [r1], #4
464 strt r9, [r1], #4
465 cmp r2, #8
466 bge .Lcleanup8
467
468 .Lprecleanup:
469 /*
470 * If we're done, bail.
471 */
472 cmp r2, #0
473 beq .Lout
474
475 .Lcleanup:
476 and r6, r2, #0x3
477 ldr pc, [pc, r6, lsl #2]
478 b .Lcend
479 .word .Lc4
480 .word .Lc1
481 .word .Lc2
482 .word .Lc3
483 .Lc4: ldrb r6, [r0], #1
484 sub r2, r2, #1
485 strbt r6, [r1], #1
486 .Lc3: ldrb r7, [r0], #1
487 sub r2, r2, #1
488 strbt r7, [r1], #1
489 .Lc2: ldrb r6, [r0], #1
490 sub r2, r2, #1
491 strbt r6, [r1], #1
492 .Lc1: ldrb r7, [r0], #1
493 subs r2, r2, #1
494 strbt r7, [r1], #1
495 .Lcend:
496 bne .Lcleanup
497
498 .Lout:
499 mov r0, #0
500
501 str r5, [r4, #PCB_ONFAULT]
502 RESTORE_REGS
503
504 RET
505 #endif
506
507 /*
508 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
509 *
510 * Copies a single 8-bit value from src to dest, returning 0 on success,
511 * else EFAULT if a page fault occurred.
512 */
513 ENTRY(badaddr_read_1)
514 #ifdef MULTIPROCESSOR
515 /* XXX Probably not appropriate for non-Hydra SMPs */
516 stmfd sp!, {r0-r1, r14}
517 bl _C_LABEL(cpu_number)
518 ldr r2, .Lcpu_info
519 ldr r2, [r2, r0, lsl #2]
520 ldr r2, [r2, #CI_CURPCB]
521 ldmfd sp!, {r0-r1, r14}
522 #else
523 ldr r2, .Lcurpcb
524 ldr r2, [r2]
525 #endif
526 ldr ip, [r2, #PCB_ONFAULT]
527 adr r3, 1f
528 str r3, [r2, #PCB_ONFAULT]
529 nop
530 nop
531 nop
532 ldrb r3, [r0]
533 nop
534 nop
535 nop
536 strb r3, [r1]
537 mov r0, #0 /* No fault */
538 1: str ip, [r2, #PCB_ONFAULT]
539 RET
540
541 /*
542 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
543 *
544 * Copies a single 16-bit value from src to dest, returning 0 on success,
545 * else EFAULT if a page fault occurred.
546 */
547 ENTRY(badaddr_read_2)
548 #ifdef MULTIPROCESSOR
549 /* XXX Probably not appropriate for non-Hydra SMPs */
550 stmfd sp!, {r0-r1, r14}
551 bl _C_LABEL(cpu_number)
552 ldr r2, .Lcpu_info
553 ldr r2, [r2, r0, lsl #2]
554 ldr r2, [r2, #CI_CURPCB]
555 ldmfd sp!, {r0-r1, r14}
556 #else
557 ldr r2, .Lcurpcb
558 ldr r2, [r2]
559 #endif
560 ldr ip, [r2, #PCB_ONFAULT]
561 adr r3, 1f
562 str r3, [r2, #PCB_ONFAULT]
563 nop
564 nop
565 nop
566 ldrh r3, [r0]
567 nop
568 nop
569 nop
570 strh r3, [r1]
571 mov r0, #0 /* No fault */
572 1: str ip, [r2, #PCB_ONFAULT]
573 RET
574
575 /*
576 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
577 *
578 * Copies a single 32-bit value from src to dest, returning 0 on success,
579 * else EFAULT if a page fault occurred.
580 */
581 ENTRY(badaddr_read_4)
582 #ifdef MULTIPROCESSOR
583 /* XXX Probably not appropriate for non-Hydra SMPs */
584 stmfd sp!, {r0-r1, r14}
585 bl _C_LABEL(cpu_number)
586 ldr r2, .Lcpu_info
587 ldr r2, [r2, r0, lsl #2]
588 ldr r2, [r2, #CI_CURPCB]
589 ldmfd sp!, {r0-r1, r14}
590 #else
591 ldr r2, .Lcurpcb
592 ldr r2, [r2]
593 #endif
594 ldr ip, [r2, #PCB_ONFAULT]
595 adr r3, 1f
596 str r3, [r2, #PCB_ONFAULT]
597 nop
598 nop
599 nop
600 ldr r3, [r0]
601 nop
602 nop
603 nop
604 str r3, [r1]
605 mov r0, #0 /* No fault */
606 1: str ip, [r2, #PCB_ONFAULT]
607 RET
608
Cache object: 50bc1327e6e2bcbe850b0fe4c4d3aeb3
|