FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/blockio.S
1 /* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
2
3 /*-
4 * Copyright (c) 2001 Ben Harris.
5 * Copyright (c) 1994 Mark Brinicombe.
6 * Copyright (c) 1994 Brini.
7 * All rights reserved.
8 *
9 * This code is derived from software written for Brini by Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by Brini.
22 * 4. The name of the company nor the name of the author may be used to
23 * endorse or promote products derived from this software without specific
24 * prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
27 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
30 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * RiscBSD kernel project
39 *
40 * blockio.S
41 *
42 * optimised block read/write from/to IO routines.
43 *
44 * Created : 08/10/94
45 * Modified : 22/01/99 -- R.Earnshaw
46 * Faster, and small tweaks for StrongARM
47 */
48
49 #include <machine/asm.h>
50
51 __FBSDID("$FreeBSD: releng/10.2/sys/arm/arm/blockio.S 275767 2014-12-14 16:28:53Z andrew $");
52
53 .syntax unified
54
55 /*
56 * Read bytes from an I/O address into a block of memory
57 *
58 * r0 = address to read from (IO)
59 * r1 = address to write to (memory)
60 * r2 = length
61 */
62
63 /* This code will look very familiar if you've read _memcpy(). */
64 ENTRY(read_multi_1)
65 mov ip, sp
66 stmfd sp!, {fp, ip, lr, pc}
67 sub fp, ip, #4
68 subs r2, r2, #4 /* r2 = length - 4 */
69 blt .Lrm1_l4 /* less than 4 bytes */
70 ands r12, r1, #3
71 beq .Lrm1_main /* aligned destination */
72 rsb r12, r12, #4
73 cmp r12, #2
74 ldrb r3, [r0]
75 strb r3, [r1], #1
76 ldrbge r3, [r0]
77 strbge r3, [r1], #1
78 ldrbgt r3, [r0]
79 strbgt r3, [r1], #1
80 subs r2, r2, r12
81 blt .Lrm1_l4
82 .Lrm1_main:
83 .Lrm1loop:
84 ldrb r3, [r0]
85 ldrb r12, [r0]
86 orr r3, r3, r12, lsl #8
87 ldrb r12, [r0]
88 orr r3, r3, r12, lsl #16
89 ldrb r12, [r0]
90 orr r3, r3, r12, lsl #24
91 str r3, [r1], #4
92 subs r2, r2, #4
93 bge .Lrm1loop
94 .Lrm1_l4:
95 adds r2, r2, #4 /* r2 = length again */
96 ldmdbeq fp, {fp, sp, pc}
97 RETeq
98 cmp r2, #2
99 ldrb r3, [r0]
100 strb r3, [r1], #1
101 ldrbge r3, [r0]
102 strbge r3, [r1], #1
103 ldrbgt r3, [r0]
104 strbgt r3, [r1], #1
105 ldmdb fp, {fp, sp, pc}
106 END(read_multi_1)
107
108 /*
109 * Write bytes to an I/O address from a block of memory
110 *
111 * r0 = address to write to (IO)
112 * r1 = address to read from (memory)
113 * r2 = length
114 */
115
116 /* This code will look very familiar if you've read _memcpy(). */
117 ENTRY(write_multi_1)
118 mov ip, sp
119 stmfd sp!, {fp, ip, lr, pc}
120 sub fp, ip, #4
121 subs r2, r2, #4 /* r2 = length - 4 */
122 blt .Lwm1_l4 /* less than 4 bytes */
123 ands r12, r1, #3
124 beq .Lwm1_main /* aligned source */
125 rsb r12, r12, #4
126 cmp r12, #2
127 ldrb r3, [r1], #1
128 strb r3, [r0]
129 ldrbge r3, [r1], #1
130 strbge r3, [r0]
131 ldrbgt r3, [r1], #1
132 strbgt r3, [r0]
133 subs r2, r2, r12
134 blt .Lwm1_l4
135 .Lwm1_main:
136 .Lwm1loop:
137 ldr r3, [r1], #4
138 strb r3, [r0]
139 mov r3, r3, lsr #8
140 strb r3, [r0]
141 mov r3, r3, lsr #8
142 strb r3, [r0]
143 mov r3, r3, lsr #8
144 strb r3, [r0]
145 subs r2, r2, #4
146 bge .Lwm1loop
147 .Lwm1_l4:
148 adds r2, r2, #4 /* r2 = length again */
149 ldmdbeq fp, {fp, sp, pc}
150 cmp r2, #2
151 ldrb r3, [r1], #1
152 strb r3, [r0]
153 ldrbge r3, [r1], #1
154 strbge r3, [r0]
155 ldrbgt r3, [r1], #1
156 strbgt r3, [r0]
157 ldmdb fp, {fp, sp, pc}
158 END(write_multi_1)
159
160 /*
161 * Reads short ints (16 bits) from an I/O address into a block of memory
162 *
163 * r0 = address to read from (IO)
164 * r1 = address to write to (memory)
165 * r2 = length
166 */
167
168 ENTRY(insw)
169 /* Make sure that we have a positive length */
170 cmp r2, #0x00000000
171 movle pc, lr
172
173 /* If the destination address and the size is word aligned, do it fast */
174
175 tst r2, #0x00000001
176 tsteq r1, #0x00000003
177 beq .Lfastinsw
178
179 /* Non aligned insw */
180
181 .Linswloop:
182 ldr r3, [r0]
183 subs r2, r2, #0x00000001 /* Loop test in load delay slot */
184 strb r3, [r1], #0x0001
185 mov r3, r3, lsr #8
186 strb r3, [r1], #0x0001
187 bgt .Linswloop
188
189 RET
190
191 /* Word aligned insw */
192
193 .Lfastinsw:
194
195 .Lfastinswloop:
196 ldr r3, [r0, #0x0002] /* take advantage of nonaligned
197 * word accesses */
198 ldr ip, [r0]
199 mov r3, r3, lsr #16 /* Put the two shorts together */
200 orr r3, r3, ip, lsl #16
201 str r3, [r1], #0x0004 /* Store */
202 subs r2, r2, #0x00000002 /* Next */
203 bgt .Lfastinswloop
204
205 RET
206 END(insw)
207
208 /*
209 * Writes short ints (16 bits) from a block of memory to an I/O address
210 *
211 * r0 = address to write to (IO)
212 * r1 = address to read from (memory)
213 * r2 = length
214 */
215
216 ENTRY(outsw)
217 /* Make sure that we have a positive length */
218 cmp r2, #0x00000000
219 movle pc, lr
220
221 /* If the destination address and the size is word aligned, do it fast */
222
223 tst r2, #0x00000001
224 tsteq r1, #0x00000003
225 beq .Lfastoutsw
226
227 /* Non aligned outsw */
228
229 .Loutswloop:
230 ldrb r3, [r1], #0x0001
231 ldrb ip, [r1], #0x0001
232 subs r2, r2, #0x00000001 /* Loop test in load delay slot */
233 orr r3, r3, ip, lsl #8
234 orr r3, r3, r3, lsl #16
235 str r3, [r0]
236 bgt .Loutswloop
237
238 RET
239
240 /* Word aligned outsw */
241
242 .Lfastoutsw:
243
244 .Lfastoutswloop:
245 ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
246 subs r2, r2, #0x00000002 /* Loop test in load delay slot */
247
248 eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
249 eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
250 eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
251
252 str r3, [r0]
253 str ip, [r0]
254
255 /* mov ip, r3, lsl #16
256 * orr ip, ip, ip, lsr #16
257 * str ip, [r0]
258 *
259 * mov ip, r3, lsr #16
260 * orr ip, ip, ip, lsl #16
261 * str ip, [r0]
262 */
263
264 bgt .Lfastoutswloop
265
266 RET
267 END(outsw)
268
269 /*
270 * reads short ints (16 bits) from an I/O address into a block of memory
271 * with a length garenteed to be a multiple of 16 bytes
272 * with a word aligned destination address
273 *
274 * r0 = address to read from (IO)
275 * r1 = address to write to (memory)
276 * r2 = length
277 */
278
279 ENTRY(insw16)
280 /* Make sure that we have a positive length */
281 cmp r2, #0x00000000
282 movle pc, lr
283
284 /* If the destination address is word aligned and the size suitably
285 aligned, do it fast */
286
287 tst r2, #0x00000007
288 tsteq r1, #0x00000003
289
290 bne _C_LABEL(insw)
291
292 /* Word aligned insw */
293
294 stmfd sp!, {r4,r5,lr}
295
296 .Linsw16loop:
297 ldr r3, [r0, #0x0002] /* take advantage of nonaligned
298 * word accesses */
299 ldr lr, [r0]
300 mov r3, r3, lsr #16 /* Put the two shorts together */
301 orr r3, r3, lr, lsl #16
302
303 ldr r4, [r0, #0x0002] /* take advantage of nonaligned
304 * word accesses */
305 ldr lr, [r0]
306 mov r4, r4, lsr #16 /* Put the two shorts together */
307 orr r4, r4, lr, lsl #16
308
309 ldr r5, [r0, #0x0002] /* take advantage of nonaligned
310 * word accesses */
311 ldr lr, [r0]
312 mov r5, r5, lsr #16 /* Put the two shorts together */
313 orr r5, r5, lr, lsl #16
314
315 ldr ip, [r0, #0x0002] /* take advantage of nonaligned
316 * word accesses */
317 ldr lr, [r0]
318 mov ip, ip, lsr #16 /* Put the two shorts together */
319 orr ip, ip, lr, lsl #16
320
321 stmia r1!, {r3-r5,ip}
322 subs r2, r2, #0x00000008 /* Next */
323 bgt .Linsw16loop
324
325 ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
326 END(insw16)
327
328 /*
329 * Writes short ints (16 bits) from a block of memory to an I/O address
330 *
331 * r0 = address to write to (IO)
332 * r1 = address to read from (memory)
333 * r2 = length
334 */
335
336 ENTRY(outsw16)
337 /* Make sure that we have a positive length */
338 cmp r2, #0x00000000
339 movle pc, lr
340
341 /* If the destination address is word aligned and the size suitably
342 aligned, do it fast */
343
344 tst r2, #0x00000007
345 tsteq r1, #0x00000003
346
347 bne _C_LABEL(outsw)
348
349 /* Word aligned outsw */
350
351 stmfd sp!, {r4,r5,lr}
352
353 .Loutsw16loop:
354 ldmia r1!, {r4,r5,ip,lr}
355
356 eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
357 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
358 eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
359 str r3, [r0]
360 str r4, [r0]
361
362 /* mov r3, r4, lsl #16
363 * orr r3, r3, r3, lsr #16
364 * str r3, [r0]
365 *
366 * mov r3, r4, lsr #16
367 * orr r3, r3, r3, lsl #16
368 * str r3, [r0]
369 */
370
371 eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
372 eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
373 eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
374 str r3, [r0]
375 str r5, [r0]
376
377 eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
378 eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
379 eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
380 str r3, [r0]
381 str ip, [r0]
382
383 eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
384 eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
385 eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
386 str r3, [r0]
387 str lr, [r0]
388
389 subs r2, r2, #0x00000008
390 bgt .Loutsw16loop
391
392 ldmfd sp!, {r4,r5,pc} /* and go home */
393 END(outsw16)
394
395 /*
396 * reads short ints (16 bits) from an I/O address into a block of memory
397 * The I/O address is assumed to be mapped multiple times in a block of
398 * 8 words.
399 * The destination address should be word aligned.
400 *
401 * r0 = address to read from (IO)
402 * r1 = address to write to (memory)
403 * r2 = length
404 */
405
406 ENTRY(inswm8)
407 /* Make sure that we have a positive length */
408 cmp r2, #0x00000000
409 movle pc, lr
410
411 /* If the destination address is word aligned and the size suitably
412 aligned, do it fast */
413
414 tst r1, #0x00000003
415
416 bne _C_LABEL(insw)
417
418 /* Word aligned insw */
419
420 stmfd sp!, {r4-r9,lr}
421
422 mov lr, #0xff000000
423 orr lr, lr, #0x00ff0000
424
425 .Linswm8_loop8:
426 cmp r2, #8
427 bcc .Linswm8_l8
428
429 ldmia r0, {r3-r9,ip}
430
431 bic r3, r3, lr
432 orr r3, r3, r4, lsl #16
433 bic r5, r5, lr
434 orr r4, r5, r6, lsl #16
435 bic r7, r7, lr
436 orr r5, r7, r8, lsl #16
437 bic r9, r9, lr
438 orr r6, r9, ip, lsl #16
439
440 stmia r1!, {r3-r6}
441
442 subs r2, r2, #0x00000008 /* Next */
443 bne .Linswm8_loop8
444 beq .Linswm8_l1
445
446 .Linswm8_l8:
447 cmp r2, #4
448 bcc .Linswm8_l4
449
450 ldmia r0, {r3-r6}
451
452 bic r3, r3, lr
453 orr r3, r3, r4, lsl #16
454 bic r5, r5, lr
455 orr r4, r5, r6, lsl #16
456
457 stmia r1!, {r3-r4}
458
459 subs r2, r2, #0x00000004
460 beq .Linswm8_l1
461
462 .Linswm8_l4:
463 cmp r2, #2
464 bcc .Linswm8_l2
465
466 ldmia r0, {r3-r4}
467
468 bic r3, r3, lr
469 orr r3, r3, r4, lsl #16
470 str r3, [r1], #0x0004
471
472 subs r2, r2, #0x00000002
473 beq .Linswm8_l1
474
475 .Linswm8_l2:
476 cmp r2, #1
477 bcc .Linswm8_l1
478
479 ldr r3, [r0]
480 subs r2, r2, #0x00000001 /* Test in load delay slot */
481 /* XXX, why don't we use result? */
482
483 strb r3, [r1], #0x0001
484 mov r3, r3, lsr #8
485 strb r3, [r1], #0x0001
486
487
488 .Linswm8_l1:
489 ldmfd sp!, {r4-r9,pc} /* And go home */
490 END(inswm8)
491
492 /*
493 * write short ints (16 bits) to an I/O address from a block of memory
494 * The I/O address is assumed to be mapped multiple times in a block of
495 * 8 words.
496 * The source address should be word aligned.
497 *
498 * r0 = address to read to (IO)
499 * r1 = address to write from (memory)
500 * r2 = length
501 */
502
503 ENTRY(outswm8)
504 /* Make sure that we have a positive length */
505 cmp r2, #0x00000000
506 movle pc, lr
507
508 /* If the destination address is word aligned and the size suitably
509 aligned, do it fast */
510
511 tst r1, #0x00000003
512
513 bne _C_LABEL(outsw)
514
515 /* Word aligned outsw */
516
517 stmfd sp!, {r4-r8,lr}
518
519 .Loutswm8_loop8:
520 cmp r2, #8
521 bcc .Loutswm8_l8
522
523 ldmia r1!, {r3,r5,r7,ip}
524
525 eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
526 eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
527 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
528
529 eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
530 eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
531 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
532
533 eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
534 eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
535 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
536
537 eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
538 eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
539 eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
540
541 stmia r0, {r3-r8,ip,lr}
542
543 subs r2, r2, #0x00000008 /* Next */
544 bne .Loutswm8_loop8
545 beq .Loutswm8_l1
546
547 .Loutswm8_l8:
548 cmp r2, #4
549 bcc .Loutswm8_l4
550
551 ldmia r1!, {r3-r4}
552
553 eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
554 eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
555 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
556
557 eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
558 eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
559 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
560
561 stmia r0, {r5-r8}
562
563 subs r2, r2, #0x00000004
564 beq .Loutswm8_l1
565
566 .Loutswm8_l4:
567 cmp r2, #2
568 bcc .Loutswm8_l2
569
570 ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
571 subs r2, r2, #0x00000002 /* Done test in Load delay slot */
572
573 eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
574 eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
575 eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
576
577 stmia r0, {r4, r5}
578
579 beq .Loutswm8_l1
580
581 .Loutswm8_l2:
582 cmp r2, #1
583 bcc .Loutswm8_l1
584
585 ldrb r3, [r1], #0x0001
586 ldrb r4, [r1], #0x0001
587 subs r2, r2, #0x00000001 /* Done test in load delay slot */
588 /* XXX This test isn't used? */
589 orr r3, r3, r4, lsl #8
590 orr r3, r3, r3, lsl #16
591 str r3, [r0]
592
593 .Loutswm8_l1:
594 ldmfd sp!, {r4-r8,pc} /* And go home */
595 END(outswm8)
596
Cache object: e5c0726ec8a2a052c6fe7da34f4ec468
|