FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/blockio.S
1 /* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
2
3 /*-
4 * Copyright (c) 2001 Ben Harris.
5 * Copyright (c) 1994 Mark Brinicombe.
6 * Copyright (c) 1994 Brini.
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by Brini.
22 * 4. The name of the company nor the name of the author may be used to
23 * endorse or promote products derived from this software without specific
24 * prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
27 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
30 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * RiscBSD kernel project
39 *
40 * blockio.S
41 *
42 * optimised block read/write from/to IO routines.
43 *
44 * Created : 08/10/94
45 * Modified : 22/01/99 -- R.Earnshaw
46 * Faster, and small tweaks for StrongARM
47 */
48
49 #include <machine/asm.h>
50
51 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/blockio.S 248361 2013-03-16 02:48:49Z andrew $");
52
53 /*
54 * Read bytes from an I/O address into a block of memory
55 *
56 * r0 = address to read from (IO)
57 * r1 = address to write to (memory)
58 * r2 = length
59 */
60
61 /* This code will look very familiar if you've read _memcpy(). */
62 ENTRY(read_multi_1)
63 mov ip, sp
64 stmfd sp!, {fp, ip, lr, pc}
65 sub fp, ip, #4
66 subs r2, r2, #4 /* r2 = length - 4 */
67 blt .Lrm1_l4 /* less than 4 bytes */
68 ands r12, r1, #3
69 beq .Lrm1_main /* aligned destination */
70 rsb r12, r12, #4
71 cmp r12, #2
72 ldrb r3, [r0]
73 strb r3, [r1], #1
74 ldrgeb r3, [r0]
75 strgeb r3, [r1], #1
76 ldrgtb r3, [r0]
77 strgtb r3, [r1], #1
78 subs r2, r2, r12
79 blt .Lrm1_l4
80 .Lrm1_main:
81 .Lrm1loop:
82 ldrb r3, [r0]
83 ldrb r12, [r0]
84 orr r3, r3, r12, lsl #8
85 ldrb r12, [r0]
86 orr r3, r3, r12, lsl #16
87 ldrb r12, [r0]
88 orr r3, r3, r12, lsl #24
89 str r3, [r1], #4
90 subs r2, r2, #4
91 bge .Lrm1loop
92 .Lrm1_l4:
93 adds r2, r2, #4 /* r2 = length again */
94 ldmeqdb fp, {fp, sp, pc}
95 RETeq
96 cmp r2, #2
97 ldrb r3, [r0]
98 strb r3, [r1], #1
99 ldrgeb r3, [r0]
100 strgeb r3, [r1], #1
101 ldrgtb r3, [r0]
102 strgtb r3, [r1], #1
103 ldmdb fp, {fp, sp, pc}
104 END(read_multi_1)
105
106 /*
107 * Write bytes to an I/O address from a block of memory
108 *
109 * r0 = address to write to (IO)
110 * r1 = address to read from (memory)
111 * r2 = length
112 */
113
114 /* This code will look very familiar if you've read _memcpy(). */
115 ENTRY(write_multi_1)
116 mov ip, sp
117 stmfd sp!, {fp, ip, lr, pc}
118 sub fp, ip, #4
119 subs r2, r2, #4 /* r2 = length - 4 */
120 blt .Lwm1_l4 /* less than 4 bytes */
121 ands r12, r1, #3
122 beq .Lwm1_main /* aligned source */
123 rsb r12, r12, #4
124 cmp r12, #2
125 ldrb r3, [r1], #1
126 strb r3, [r0]
127 ldrgeb r3, [r1], #1
128 strgeb r3, [r0]
129 ldrgtb r3, [r1], #1
130 strgtb r3, [r0]
131 subs r2, r2, r12
132 blt .Lwm1_l4
133 .Lwm1_main:
134 .Lwm1loop:
135 ldr r3, [r1], #4
136 strb r3, [r0]
137 mov r3, r3, lsr #8
138 strb r3, [r0]
139 mov r3, r3, lsr #8
140 strb r3, [r0]
141 mov r3, r3, lsr #8
142 strb r3, [r0]
143 subs r2, r2, #4
144 bge .Lwm1loop
145 .Lwm1_l4:
146 adds r2, r2, #4 /* r2 = length again */
147 ldmeqdb fp, {fp, sp, pc}
148 cmp r2, #2
149 ldrb r3, [r1], #1
150 strb r3, [r0]
151 ldrgeb r3, [r1], #1
152 strgeb r3, [r0]
153 ldrgtb r3, [r1], #1
154 strgtb r3, [r0]
155 ldmdb fp, {fp, sp, pc}
156 END(write_multi_1)
157
158 /*
159 * Reads short ints (16 bits) from an I/O address into a block of memory
160 *
161 * r0 = address to read from (IO)
162 * r1 = address to write to (memory)
163 * r2 = length
164 */
165
166 ENTRY(insw)
167 /* Make sure that we have a positive length */
168 cmp r2, #0x00000000
169 movle pc, lr
170
171 /* If the destination address and the size is word aligned, do it fast */
172
173 tst r2, #0x00000001
174 tsteq r1, #0x00000003
175 beq .Lfastinsw
176
177 /* Non aligned insw */
178
179 .Linswloop:
180 ldr r3, [r0]
181 subs r2, r2, #0x00000001 /* Loop test in load delay slot */
182 strb r3, [r1], #0x0001
183 mov r3, r3, lsr #8
184 strb r3, [r1], #0x0001
185 bgt .Linswloop
186
187 RET
188
189 /* Word aligned insw */
190
191 .Lfastinsw:
192
193 .Lfastinswloop:
194 ldr r3, [r0, #0x0002] /* take advantage of nonaligned
195 * word accesses */
196 ldr ip, [r0]
197 mov r3, r3, lsr #16 /* Put the two shorts together */
198 orr r3, r3, ip, lsl #16
199 str r3, [r1], #0x0004 /* Store */
200 subs r2, r2, #0x00000002 /* Next */
201 bgt .Lfastinswloop
202
203 RET
204 END(insw)
205
206 /*
207 * Writes short ints (16 bits) from a block of memory to an I/O address
208 *
209 * r0 = address to write to (IO)
210 * r1 = address to read from (memory)
211 * r2 = length
212 */
213
214 ENTRY(outsw)
215 /* Make sure that we have a positive length */
216 cmp r2, #0x00000000
217 movle pc, lr
218
219 /* If the destination address and the size is word aligned, do it fast */
220
221 tst r2, #0x00000001
222 tsteq r1, #0x00000003
223 beq .Lfastoutsw
224
225 /* Non aligned outsw */
226
227 .Loutswloop:
228 ldrb r3, [r1], #0x0001
229 ldrb ip, [r1], #0x0001
230 subs r2, r2, #0x00000001 /* Loop test in load delay slot */
231 orr r3, r3, ip, lsl #8
232 orr r3, r3, r3, lsl #16
233 str r3, [r0]
234 bgt .Loutswloop
235
236 RET
237
238 /* Word aligned outsw */
239
240 .Lfastoutsw:
241
242 .Lfastoutswloop:
243 ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
244 subs r2, r2, #0x00000002 /* Loop test in load delay slot */
245
246 eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
247 eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
248 eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
249
250 str r3, [r0]
251 str ip, [r0]
252
253 /* mov ip, r3, lsl #16
254 * orr ip, ip, ip, lsr #16
255 * str ip, [r0]
256 *
257 * mov ip, r3, lsr #16
258 * orr ip, ip, ip, lsl #16
259 * str ip, [r0]
260 */
261
262 bgt .Lfastoutswloop
263
264 RET
265 END(outsw)
266
267 /*
268 * reads short ints (16 bits) from an I/O address into a block of memory
269 * with a length garenteed to be a multiple of 16 bytes
270 * with a word aligned destination address
271 *
272 * r0 = address to read from (IO)
273 * r1 = address to write to (memory)
274 * r2 = length
275 */
276
277 ENTRY(insw16)
278 /* Make sure that we have a positive length */
279 cmp r2, #0x00000000
280 movle pc, lr
281
282 /* If the destination address is word aligned and the size suitably
283 aligned, do it fast */
284
285 tst r2, #0x00000007
286 tsteq r1, #0x00000003
287
288 bne _C_LABEL(insw)
289
290 /* Word aligned insw */
291
292 stmfd sp!, {r4,r5,lr}
293
294 .Linsw16loop:
295 ldr r3, [r0, #0x0002] /* take advantage of nonaligned
296 * word accesses */
297 ldr lr, [r0]
298 mov r3, r3, lsr #16 /* Put the two shorts together */
299 orr r3, r3, lr, lsl #16
300
301 ldr r4, [r0, #0x0002] /* take advantage of nonaligned
302 * word accesses */
303 ldr lr, [r0]
304 mov r4, r4, lsr #16 /* Put the two shorts together */
305 orr r4, r4, lr, lsl #16
306
307 ldr r5, [r0, #0x0002] /* take advantage of nonaligned
308 * word accesses */
309 ldr lr, [r0]
310 mov r5, r5, lsr #16 /* Put the two shorts together */
311 orr r5, r5, lr, lsl #16
312
313 ldr ip, [r0, #0x0002] /* take advantage of nonaligned
314 * word accesses */
315 ldr lr, [r0]
316 mov ip, ip, lsr #16 /* Put the two shorts together */
317 orr ip, ip, lr, lsl #16
318
319 stmia r1!, {r3-r5,ip}
320 subs r2, r2, #0x00000008 /* Next */
321 bgt .Linsw16loop
322
323 ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
324 END(insw16)
325
326 /*
327 * Writes short ints (16 bits) from a block of memory to an I/O address
328 *
329 * r0 = address to write to (IO)
330 * r1 = address to read from (memory)
331 * r2 = length
332 */
333
334 ENTRY(outsw16)
335 /* Make sure that we have a positive length */
336 cmp r2, #0x00000000
337 movle pc, lr
338
339 /* If the destination address is word aligned and the size suitably
340 aligned, do it fast */
341
342 tst r2, #0x00000007
343 tsteq r1, #0x00000003
344
345 bne _C_LABEL(outsw)
346
347 /* Word aligned outsw */
348
349 stmfd sp!, {r4,r5,lr}
350
351 .Loutsw16loop:
352 ldmia r1!, {r4,r5,ip,lr}
353
354 eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
355 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
356 eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
357 str r3, [r0]
358 str r4, [r0]
359
360 /* mov r3, r4, lsl #16
361 * orr r3, r3, r3, lsr #16
362 * str r3, [r0]
363 *
364 * mov r3, r4, lsr #16
365 * orr r3, r3, r3, lsl #16
366 * str r3, [r0]
367 */
368
369 eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
370 eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
371 eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
372 str r3, [r0]
373 str r5, [r0]
374
375 eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
376 eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
377 eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
378 str r3, [r0]
379 str ip, [r0]
380
381 eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
382 eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
383 eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
384 str r3, [r0]
385 str lr, [r0]
386
387 subs r2, r2, #0x00000008
388 bgt .Loutsw16loop
389
390 ldmfd sp!, {r4,r5,pc} /* and go home */
391 END(outsw16)
392
393 /*
394 * reads short ints (16 bits) from an I/O address into a block of memory
395 * The I/O address is assumed to be mapped multiple times in a block of
396 * 8 words.
397 * The destination address should be word aligned.
398 *
399 * r0 = address to read from (IO)
400 * r1 = address to write to (memory)
401 * r2 = length
402 */
403
404 ENTRY(inswm8)
405 /* Make sure that we have a positive length */
406 cmp r2, #0x00000000
407 movle pc, lr
408
409 /* If the destination address is word aligned and the size suitably
410 aligned, do it fast */
411
412 tst r1, #0x00000003
413
414 bne _C_LABEL(insw)
415
416 /* Word aligned insw */
417
418 stmfd sp!, {r4-r9,lr}
419
420 mov lr, #0xff000000
421 orr lr, lr, #0x00ff0000
422
423 .Linswm8_loop8:
424 cmp r2, #8
425 bcc .Linswm8_l8
426
427 ldmia r0, {r3-r9,ip}
428
429 bic r3, r3, lr
430 orr r3, r3, r4, lsl #16
431 bic r5, r5, lr
432 orr r4, r5, r6, lsl #16
433 bic r7, r7, lr
434 orr r5, r7, r8, lsl #16
435 bic r9, r9, lr
436 orr r6, r9, ip, lsl #16
437
438 stmia r1!, {r3-r6}
439
440 subs r2, r2, #0x00000008 /* Next */
441 bne .Linswm8_loop8
442 beq .Linswm8_l1
443
444 .Linswm8_l8:
445 cmp r2, #4
446 bcc .Linswm8_l4
447
448 ldmia r0, {r3-r6}
449
450 bic r3, r3, lr
451 orr r3, r3, r4, lsl #16
452 bic r5, r5, lr
453 orr r4, r5, r6, lsl #16
454
455 stmia r1!, {r3-r4}
456
457 subs r2, r2, #0x00000004
458 beq .Linswm8_l1
459
460 .Linswm8_l4:
461 cmp r2, #2
462 bcc .Linswm8_l2
463
464 ldmia r0, {r3-r4}
465
466 bic r3, r3, lr
467 orr r3, r3, r4, lsl #16
468 str r3, [r1], #0x0004
469
470 subs r2, r2, #0x00000002
471 beq .Linswm8_l1
472
473 .Linswm8_l2:
474 cmp r2, #1
475 bcc .Linswm8_l1
476
477 ldr r3, [r0]
478 subs r2, r2, #0x00000001 /* Test in load delay slot */
479 /* XXX, why don't we use result? */
480
481 strb r3, [r1], #0x0001
482 mov r3, r3, lsr #8
483 strb r3, [r1], #0x0001
484
485
486 .Linswm8_l1:
487 ldmfd sp!, {r4-r9,pc} /* And go home */
488 END(inswm8)
489
490 /*
491 * write short ints (16 bits) to an I/O address from a block of memory
492 * The I/O address is assumed to be mapped multiple times in a block of
493 * 8 words.
494 * The source address should be word aligned.
495 *
496 * r0 = address to read to (IO)
497 * r1 = address to write from (memory)
498 * r2 = length
499 */
500
501 ENTRY(outswm8)
502 /* Make sure that we have a positive length */
503 cmp r2, #0x00000000
504 movle pc, lr
505
506 /* If the destination address is word aligned and the size suitably
507 aligned, do it fast */
508
509 tst r1, #0x00000003
510
511 bne _C_LABEL(outsw)
512
513 /* Word aligned outsw */
514
515 stmfd sp!, {r4-r8,lr}
516
517 .Loutswm8_loop8:
518 cmp r2, #8
519 bcc .Loutswm8_l8
520
521 ldmia r1!, {r3,r5,r7,ip}
522
523 eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
524 eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
525 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
526
527 eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
528 eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
529 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
530
531 eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
532 eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
533 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
534
535 eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
536 eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
537 eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
538
539 stmia r0, {r3-r8,ip,lr}
540
541 subs r2, r2, #0x00000008 /* Next */
542 bne .Loutswm8_loop8
543 beq .Loutswm8_l1
544
545 .Loutswm8_l8:
546 cmp r2, #4
547 bcc .Loutswm8_l4
548
549 ldmia r1!, {r3-r4}
550
551 eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
552 eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
553 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
554
555 eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
556 eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
557 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
558
559 stmia r0, {r5-r8}
560
561 subs r2, r2, #0x00000004
562 beq .Loutswm8_l1
563
564 .Loutswm8_l4:
565 cmp r2, #2
566 bcc .Loutswm8_l2
567
568 ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
569 subs r2, r2, #0x00000002 /* Done test in Load delay slot */
570
571 eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
572 eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
573 eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
574
575 stmia r0, {r4, r5}
576
577 beq .Loutswm8_l1
578
579 .Loutswm8_l2:
580 cmp r2, #1
581 bcc .Loutswm8_l1
582
583 ldrb r3, [r1], #0x0001
584 ldrb r4, [r1], #0x0001
585 subs r2, r2, #0x00000001 /* Done test in load delay slot */
586 /* XXX This test isn't used? */
587 orr r3, r3, r4, lsl #8
588 orr r3, r3, r3, lsl #16
589 str r3, [r0]
590
591 .Loutswm8_l1:
592 ldmfd sp!, {r4-r8,pc} /* And go home */
593 END(outswm8)
594
Cache object: bbc76ad579759c4b17a4b900791813bb
|