FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/blockio.S
1 /* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
2
3 /*-
4 * Copyright (c) 2001 Ben Harris.
5 * Copyright (c) 1994 Mark Brinicombe.
6 * Copyright (c) 1994 Brini.
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by Brini.
22 * 4. The name of the company nor the name of the author may be used to
23 * endorse or promote products derived from this software without specific
24 * prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
27 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
30 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * RiscBSD kernel project
39 *
40 * blockio.S
41 *
42 * optimised block read/write from/to IO routines.
43 *
44 * Created : 08/10/94
45 * Modified : 22/01/99 -- R.Earnshaw
46 * Faster, and small tweaks for StrongARM
47 */
48
49 #include <machine/asm.h>
50
51 __FBSDID("$FreeBSD$");
52
53 /*
54 * Read bytes from an I/O address into a block of memory
55 *
56 * r0 = address to read from (IO)
57 * r1 = address to write to (memory)
58 * r2 = length
59 */
60
61 /* This code will look very familiar if you've read _memcpy(). */
62 ENTRY(read_multi_1)
63 mov ip, sp
64 stmfd sp!, {fp, ip, lr, pc}
65 sub fp, ip, #4
66 subs r2, r2, #4 /* r2 = length - 4 */
67 blt .Lrm1_l4 /* less than 4 bytes */
68 ands r12, r1, #3
69 beq .Lrm1_main /* aligned destination */
70 rsb r12, r12, #4
71 cmp r12, #2
72 ldrb r3, [r0]
73 strb r3, [r1], #1
74 ldrgeb r3, [r0]
75 strgeb r3, [r1], #1
76 ldrgtb r3, [r0]
77 strgtb r3, [r1], #1
78 subs r2, r2, r12
79 blt .Lrm1_l4
80 .Lrm1_main:
81 .Lrm1loop:
82 ldrb r3, [r0]
83 ldrb r12, [r0]
84 orr r3, r3, r12, lsl #8
85 ldrb r12, [r0]
86 orr r3, r3, r12, lsl #16
87 ldrb r12, [r0]
88 orr r3, r3, r12, lsl #24
89 str r3, [r1], #4
90 subs r2, r2, #4
91 bge .Lrm1loop
92 .Lrm1_l4:
93 adds r2, r2, #4 /* r2 = length again */
94 ldmeqdb fp, {fp, sp, pc}
95 RETeq
96 cmp r2, #2
97 ldrb r3, [r0]
98 strb r3, [r1], #1
99 ldrgeb r3, [r0]
100 strgeb r3, [r1], #1
101 ldrgtb r3, [r0]
102 strgtb r3, [r1], #1
103 ldmdb fp, {fp, sp, pc}
104
105 /*
106 * Write bytes to an I/O address from a block of memory
107 *
108 * r0 = address to write to (IO)
109 * r1 = address to read from (memory)
110 * r2 = length
111 */
112
113 /* This code will look very familiar if you've read _memcpy(). */
114 ENTRY(write_multi_1)
115 mov ip, sp
116 stmfd sp!, {fp, ip, lr, pc}
117 sub fp, ip, #4
118 subs r2, r2, #4 /* r2 = length - 4 */
119 blt .Lwm1_l4 /* less than 4 bytes */
120 ands r12, r1, #3
121 beq .Lwm1_main /* aligned source */
122 rsb r12, r12, #4
123 cmp r12, #2
124 ldrb r3, [r1], #1
125 strb r3, [r0]
126 ldrgeb r3, [r1], #1
127 strgeb r3, [r0]
128 ldrgtb r3, [r1], #1
129 strgtb r3, [r0]
130 subs r2, r2, r12
131 blt .Lwm1_l4
132 .Lwm1_main:
133 .Lwm1loop:
134 ldr r3, [r1], #4
135 strb r3, [r0]
136 mov r3, r3, lsr #8
137 strb r3, [r0]
138 mov r3, r3, lsr #8
139 strb r3, [r0]
140 mov r3, r3, lsr #8
141 strb r3, [r0]
142 subs r2, r2, #4
143 bge .Lwm1loop
144 .Lwm1_l4:
145 adds r2, r2, #4 /* r2 = length again */
146 ldmeqdb fp, {fp, sp, pc}
147 cmp r2, #2
148 ldrb r3, [r1], #1
149 strb r3, [r0]
150 ldrgeb r3, [r1], #1
151 strgeb r3, [r0]
152 ldrgtb r3, [r1], #1
153 strgtb r3, [r0]
154 ldmdb fp, {fp, sp, pc}
155
156 /*
157 * Reads short ints (16 bits) from an I/O address into a block of memory
158 *
159 * r0 = address to read from (IO)
160 * r1 = address to write to (memory)
161 * r2 = length
162 */
163
164 ENTRY(insw)
165 /* Make sure that we have a positive length */
166 cmp r2, #0x00000000
167 movle pc, lr
168
169 /* If the destination address and the size is word aligned, do it fast */
170
171 tst r2, #0x00000001
172 tsteq r1, #0x00000003
173 beq .Lfastinsw
174
175 /* Non aligned insw */
176
177 .Linswloop:
178 ldr r3, [r0]
179 subs r2, r2, #0x00000001 /* Loop test in load delay slot */
180 strb r3, [r1], #0x0001
181 mov r3, r3, lsr #8
182 strb r3, [r1], #0x0001
183 bgt .Linswloop
184
185 RET
186
187 /* Word aligned insw */
188
189 .Lfastinsw:
190
191 .Lfastinswloop:
192 ldr r3, [r0, #0x0002] /* take advantage of nonaligned
193 * word accesses */
194 ldr ip, [r0]
195 mov r3, r3, lsr #16 /* Put the two shorts together */
196 orr r3, r3, ip, lsl #16
197 str r3, [r1], #0x0004 /* Store */
198 subs r2, r2, #0x00000002 /* Next */
199 bgt .Lfastinswloop
200
201 RET
202
203
204 /*
205 * Writes short ints (16 bits) from a block of memory to an I/O address
206 *
207 * r0 = address to write to (IO)
208 * r1 = address to read from (memory)
209 * r2 = length
210 */
211
212 ENTRY(outsw)
213 /* Make sure that we have a positive length */
214 cmp r2, #0x00000000
215 movle pc, lr
216
217 /* If the destination address and the size is word aligned, do it fast */
218
219 tst r2, #0x00000001
220 tsteq r1, #0x00000003
221 beq .Lfastoutsw
222
223 /* Non aligned outsw */
224
225 .Loutswloop:
226 ldrb r3, [r1], #0x0001
227 ldrb ip, [r1], #0x0001
228 subs r2, r2, #0x00000001 /* Loop test in load delay slot */
229 orr r3, r3, ip, lsl #8
230 orr r3, r3, r3, lsl #16
231 str r3, [r0]
232 bgt .Loutswloop
233
234 RET
235
236 /* Word aligned outsw */
237
238 .Lfastoutsw:
239
240 .Lfastoutswloop:
241 ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
242 subs r2, r2, #0x00000002 /* Loop test in load delay slot */
243
244 eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
245 eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
246 eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
247
248 str r3, [r0]
249 str ip, [r0]
250
251 /* mov ip, r3, lsl #16
252 * orr ip, ip, ip, lsr #16
253 * str ip, [r0]
254 *
255 * mov ip, r3, lsr #16
256 * orr ip, ip, ip, lsl #16
257 * str ip, [r0]
258 */
259
260 bgt .Lfastoutswloop
261
262 RET
263
264 /*
265 * reads short ints (16 bits) from an I/O address into a block of memory
266 * with a length garenteed to be a multiple of 16 bytes
267 * with a word aligned destination address
268 *
269 * r0 = address to read from (IO)
270 * r1 = address to write to (memory)
271 * r2 = length
272 */
273
274 ENTRY(insw16)
275 /* Make sure that we have a positive length */
276 cmp r2, #0x00000000
277 movle pc, lr
278
279 /* If the destination address is word aligned and the size suitably
280 aligned, do it fast */
281
282 tst r2, #0x00000007
283 tsteq r1, #0x00000003
284
285 bne _C_LABEL(insw)
286
287 /* Word aligned insw */
288
289 stmfd sp!, {r4,r5,lr}
290
291 .Linsw16loop:
292 ldr r3, [r0, #0x0002] /* take advantage of nonaligned
293 * word accesses */
294 ldr lr, [r0]
295 mov r3, r3, lsr #16 /* Put the two shorts together */
296 orr r3, r3, lr, lsl #16
297
298 ldr r4, [r0, #0x0002] /* take advantage of nonaligned
299 * word accesses */
300 ldr lr, [r0]
301 mov r4, r4, lsr #16 /* Put the two shorts together */
302 orr r4, r4, lr, lsl #16
303
304 ldr r5, [r0, #0x0002] /* take advantage of nonaligned
305 * word accesses */
306 ldr lr, [r0]
307 mov r5, r5, lsr #16 /* Put the two shorts together */
308 orr r5, r5, lr, lsl #16
309
310 ldr ip, [r0, #0x0002] /* take advantage of nonaligned
311 * word accesses */
312 ldr lr, [r0]
313 mov ip, ip, lsr #16 /* Put the two shorts together */
314 orr ip, ip, lr, lsl #16
315
316 stmia r1!, {r3-r5,ip}
317 subs r2, r2, #0x00000008 /* Next */
318 bgt .Linsw16loop
319
320 ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
321
322
323 /*
324 * Writes short ints (16 bits) from a block of memory to an I/O address
325 *
326 * r0 = address to write to (IO)
327 * r1 = address to read from (memory)
328 * r2 = length
329 */
330
331 ENTRY(outsw16)
332 /* Make sure that we have a positive length */
333 cmp r2, #0x00000000
334 movle pc, lr
335
336 /* If the destination address is word aligned and the size suitably
337 aligned, do it fast */
338
339 tst r2, #0x00000007
340 tsteq r1, #0x00000003
341
342 bne _C_LABEL(outsw)
343
344 /* Word aligned outsw */
345
346 stmfd sp!, {r4,r5,lr}
347
348 .Loutsw16loop:
349 ldmia r1!, {r4,r5,ip,lr}
350
351 eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
352 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
353 eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
354 str r3, [r0]
355 str r4, [r0]
356
357 /* mov r3, r4, lsl #16
358 * orr r3, r3, r3, lsr #16
359 * str r3, [r0]
360 *
361 * mov r3, r4, lsr #16
362 * orr r3, r3, r3, lsl #16
363 * str r3, [r0]
364 */
365
366 eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
367 eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
368 eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
369 str r3, [r0]
370 str r5, [r0]
371
372 eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
373 eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
374 eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
375 str r3, [r0]
376 str ip, [r0]
377
378 eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
379 eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
380 eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
381 str r3, [r0]
382 str lr, [r0]
383
384 subs r2, r2, #0x00000008
385 bgt .Loutsw16loop
386
387 ldmfd sp!, {r4,r5,pc} /* and go home */
388
389 /*
390 * reads short ints (16 bits) from an I/O address into a block of memory
391 * The I/O address is assumed to be mapped multiple times in a block of
392 * 8 words.
393 * The destination address should be word aligned.
394 *
395 * r0 = address to read from (IO)
396 * r1 = address to write to (memory)
397 * r2 = length
398 */
399
400 ENTRY(inswm8)
401 /* Make sure that we have a positive length */
402 cmp r2, #0x00000000
403 movle pc, lr
404
405 /* If the destination address is word aligned and the size suitably
406 aligned, do it fast */
407
408 tst r1, #0x00000003
409
410 bne _C_LABEL(insw)
411
412 /* Word aligned insw */
413
414 stmfd sp!, {r4-r9,lr}
415
416 mov lr, #0xff000000
417 orr lr, lr, #0x00ff0000
418
419 .Linswm8_loop8:
420 cmp r2, #8
421 bcc .Linswm8_l8
422
423 ldmia r0, {r3-r9,ip}
424
425 bic r3, r3, lr
426 orr r3, r3, r4, lsl #16
427 bic r5, r5, lr
428 orr r4, r5, r6, lsl #16
429 bic r7, r7, lr
430 orr r5, r7, r8, lsl #16
431 bic r9, r9, lr
432 orr r6, r9, ip, lsl #16
433
434 stmia r1!, {r3-r6}
435
436 subs r2, r2, #0x00000008 /* Next */
437 bne .Linswm8_loop8
438 beq .Linswm8_l1
439
440 .Linswm8_l8:
441 cmp r2, #4
442 bcc .Linswm8_l4
443
444 ldmia r0, {r3-r6}
445
446 bic r3, r3, lr
447 orr r3, r3, r4, lsl #16
448 bic r5, r5, lr
449 orr r4, r5, r6, lsl #16
450
451 stmia r1!, {r3-r4}
452
453 subs r2, r2, #0x00000004
454 beq .Linswm8_l1
455
456 .Linswm8_l4:
457 cmp r2, #2
458 bcc .Linswm8_l2
459
460 ldmia r0, {r3-r4}
461
462 bic r3, r3, lr
463 orr r3, r3, r4, lsl #16
464 str r3, [r1], #0x0004
465
466 subs r2, r2, #0x00000002
467 beq .Linswm8_l1
468
469 .Linswm8_l2:
470 cmp r2, #1
471 bcc .Linswm8_l1
472
473 ldr r3, [r0]
474 subs r2, r2, #0x00000001 /* Test in load delay slot */
475 /* XXX, why don't we use result? */
476
477 strb r3, [r1], #0x0001
478 mov r3, r3, lsr #8
479 strb r3, [r1], #0x0001
480
481
482 .Linswm8_l1:
483 ldmfd sp!, {r4-r9,pc} /* And go home */
484
485 /*
486 * write short ints (16 bits) to an I/O address from a block of memory
487 * The I/O address is assumed to be mapped multiple times in a block of
488 * 8 words.
489 * The source address should be word aligned.
490 *
491 * r0 = address to read to (IO)
492 * r1 = address to write from (memory)
493 * r2 = length
494 */
495
496 ENTRY(outswm8)
497 /* Make sure that we have a positive length */
498 cmp r2, #0x00000000
499 movle pc, lr
500
501 /* If the destination address is word aligned and the size suitably
502 aligned, do it fast */
503
504 tst r1, #0x00000003
505
506 bne _C_LABEL(outsw)
507
508 /* Word aligned outsw */
509
510 stmfd sp!, {r4-r8,lr}
511
512 .Loutswm8_loop8:
513 cmp r2, #8
514 bcc .Loutswm8_l8
515
516 ldmia r1!, {r3,r5,r7,ip}
517
518 eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
519 eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
520 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
521
522 eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
523 eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
524 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
525
526 eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
527 eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
528 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
529
530 eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
531 eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
532 eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
533
534 stmia r0, {r3-r8,ip,lr}
535
536 subs r2, r2, #0x00000008 /* Next */
537 bne .Loutswm8_loop8
538 beq .Loutswm8_l1
539
540 .Loutswm8_l8:
541 cmp r2, #4
542 bcc .Loutswm8_l4
543
544 ldmia r1!, {r3-r4}
545
546 eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
547 eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
548 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
549
550 eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
551 eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
552 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
553
554 stmia r0, {r5-r8}
555
556 subs r2, r2, #0x00000004
557 beq .Loutswm8_l1
558
559 .Loutswm8_l4:
560 cmp r2, #2
561 bcc .Loutswm8_l2
562
563 ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
564 subs r2, r2, #0x00000002 /* Done test in Load delay slot */
565
566 eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
567 eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
568 eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
569
570 stmia r0, {r4, r5}
571
572 beq .Loutswm8_l1
573
574 .Loutswm8_l2:
575 cmp r2, #1
576 bcc .Loutswm8_l1
577
578 ldrb r3, [r1], #0x0001
579 ldrb r4, [r1], #0x0001
580 subs r2, r2, #0x00000001 /* Done test in load delay slot */
581 /* XXX This test isn't used? */
582 orr r3, r3, r4, lsl #8
583 orr r3, r3, r3, lsl #16
584 str r3, [r0]
585
586 .Loutswm8_l1:
587 ldmfd sp!, {r4-r8,pc} /* And go home */
Cache object: db6bf9c186f1b749866a23aeef236bda
|