1 /* $NetBSD: bcopy_page.S,v 1.7 2003/10/13 21:03:13 scw Exp $ */
2
3 /*-
4 * Copyright (c) 1995 Scott Stevens
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Scott Stevens.
18 * 4. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * RiscBSD kernel project
33 *
34 * bcopy_page.S
35 *
36 * page optimised bcopy and bzero routines
37 *
38 * Created : 08/04/95
39 */
40
41 #include <machine/asm.h>
42 #include <machine/param.h>
43 __FBSDID("$FreeBSD: releng/6.2/sys/arm/arm/bcopy_page.S 139735 2005-01-05 21:58:49Z imp $");
44 #ifndef __XSCALE__
45
46 /* #define BIG_LOOPS */
47
48 /*
49 * bcopy_page(src, dest)
50 *
51 * Optimised copy page routine.
52 *
53 * On entry:
54 * r0 - src address
55 * r1 - dest address
56 *
57 * Requires:
58 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
59 * otherwise.
60 */
61
62 #define CHUNK_SIZE 32
63
64 #define PREFETCH_FIRST_CHUNK /* nothing */
65 #define PREFETCH_NEXT_CHUNK /* nothing */
66
67 #ifndef COPY_CHUNK
68 #define COPY_CHUNK \
69 PREFETCH_NEXT_CHUNK ; \
70 ldmia r0!, {r3-r8,ip,lr} ; \
71 stmia r1!, {r3-r8,ip,lr}
72 #endif /* ! COPY_CHUNK */
73
74 #ifndef SAVE_REGS
75 #define SAVE_REGS stmfd sp!, {r4-r8, lr}
76 #define RESTORE_REGS ldmfd sp!, {r4-r8, pc}
77 #endif
78
79 ENTRY(bcopy_page)
80 PREFETCH_FIRST_CHUNK
81 SAVE_REGS
82 #ifdef BIG_LOOPS
83 mov r2, #(PAGE_SIZE >> 9)
84 #else
85 mov r2, #(PAGE_SIZE >> 7)
86 #endif
87
88 1:
89 COPY_CHUNK
90 COPY_CHUNK
91 COPY_CHUNK
92 COPY_CHUNK
93
94 #ifdef BIG_LOOPS
95 /* There is little point making the loop any larger; unless we are
96 running with the cache off, the load/store overheads will
97 completely dominate this loop. */
98 COPY_CHUNK
99 COPY_CHUNK
100 COPY_CHUNK
101 COPY_CHUNK
102
103 COPY_CHUNK
104 COPY_CHUNK
105 COPY_CHUNK
106 COPY_CHUNK
107
108 COPY_CHUNK
109 COPY_CHUNK
110 COPY_CHUNK
111 COPY_CHUNK
112 #endif
113 subs r2, r2, #1
114 bne 1b
115
116 RESTORE_REGS /* ...and return. */
117
118 /*
119 * bzero_page(dest)
120 *
121 * Optimised zero page routine.
122 *
123 * On entry:
124 * r0 - dest address
125 *
126 * Requires:
127 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
128 * otherwise
129 */
130
131 ENTRY(bzero_page)
132 stmfd sp!, {r4-r8, lr}
133 #ifdef BIG_LOOPS
134 mov r2, #(PAGE_SIZE >> 9)
135 #else
136 mov r2, #(PAGE_SIZE >> 7)
137 #endif
138 mov r3, #0
139 mov r4, #0
140 mov r5, #0
141 mov r6, #0
142 mov r7, #0
143 mov r8, #0
144 mov ip, #0
145 mov lr, #0
146
147 1:
148 stmia r0!, {r3-r8,ip,lr}
149 stmia r0!, {r3-r8,ip,lr}
150 stmia r0!, {r3-r8,ip,lr}
151 stmia r0!, {r3-r8,ip,lr}
152
153 #ifdef BIG_LOOPS
154 /* There is little point making the loop any larger; unless we are
155 running with the cache off, the load/store overheads will
156 completely dominate this loop. */
157 stmia r0!, {r3-r8,ip,lr}
158 stmia r0!, {r3-r8,ip,lr}
159 stmia r0!, {r3-r8,ip,lr}
160 stmia r0!, {r3-r8,ip,lr}
161
162 stmia r0!, {r3-r8,ip,lr}
163 stmia r0!, {r3-r8,ip,lr}
164 stmia r0!, {r3-r8,ip,lr}
165 stmia r0!, {r3-r8,ip,lr}
166
167 stmia r0!, {r3-r8,ip,lr}
168 stmia r0!, {r3-r8,ip,lr}
169 stmia r0!, {r3-r8,ip,lr}
170 stmia r0!, {r3-r8,ip,lr}
171
172 #endif
173
174 subs r2, r2, #1
175 bne 1b
176
177 ldmfd sp!, {r4-r8, pc}
178
179 #else /* __XSCALE__ */
180
181 /*
182 * XSCALE version of bcopy_page
183 */
184 ENTRY(bcopy_page)
185 pld [r0]
186 stmfd sp!, {r4, r5}
187 mov ip, #32
188 ldr r2, [r0], #0x04 /* 0x00 */
189 ldr r3, [r0], #0x04 /* 0x04 */
190 1: pld [r0, #0x18] /* Prefetch 0x20 */
191 ldr r4, [r0], #0x04 /* 0x08 */
192 ldr r5, [r0], #0x04 /* 0x0c */
193 strd r2, [r1], #0x08
194 ldr r2, [r0], #0x04 /* 0x10 */
195 ldr r3, [r0], #0x04 /* 0x14 */
196 strd r4, [r1], #0x08
197 ldr r4, [r0], #0x04 /* 0x18 */
198 ldr r5, [r0], #0x04 /* 0x1c */
199 strd r2, [r1], #0x08
200 ldr r2, [r0], #0x04 /* 0x20 */
201 ldr r3, [r0], #0x04 /* 0x24 */
202 pld [r0, #0x18] /* Prefetch 0x40 */
203 strd r4, [r1], #0x08
204 ldr r4, [r0], #0x04 /* 0x28 */
205 ldr r5, [r0], #0x04 /* 0x2c */
206 strd r2, [r1], #0x08
207 ldr r2, [r0], #0x04 /* 0x30 */
208 ldr r3, [r0], #0x04 /* 0x34 */
209 strd r4, [r1], #0x08
210 ldr r4, [r0], #0x04 /* 0x38 */
211 ldr r5, [r0], #0x04 /* 0x3c */
212 strd r2, [r1], #0x08
213 ldr r2, [r0], #0x04 /* 0x40 */
214 ldr r3, [r0], #0x04 /* 0x44 */
215 pld [r0, #0x18] /* Prefetch 0x60 */
216 strd r4, [r1], #0x08
217 ldr r4, [r0], #0x04 /* 0x48 */
218 ldr r5, [r0], #0x04 /* 0x4c */
219 strd r2, [r1], #0x08
220 ldr r2, [r0], #0x04 /* 0x50 */
221 ldr r3, [r0], #0x04 /* 0x54 */
222 strd r4, [r1], #0x08
223 ldr r4, [r0], #0x04 /* 0x58 */
224 ldr r5, [r0], #0x04 /* 0x5c */
225 strd r2, [r1], #0x08
226 ldr r2, [r0], #0x04 /* 0x60 */
227 ldr r3, [r0], #0x04 /* 0x64 */
228 pld [r0, #0x18] /* Prefetch 0x80 */
229 strd r4, [r1], #0x08
230 ldr r4, [r0], #0x04 /* 0x68 */
231 ldr r5, [r0], #0x04 /* 0x6c */
232 strd r2, [r1], #0x08
233 ldr r2, [r0], #0x04 /* 0x70 */
234 ldr r3, [r0], #0x04 /* 0x74 */
235 strd r4, [r1], #0x08
236 ldr r4, [r0], #0x04 /* 0x78 */
237 ldr r5, [r0], #0x04 /* 0x7c */
238 strd r2, [r1], #0x08
239 subs ip, ip, #0x01
240 ldrgt r2, [r0], #0x04 /* 0x80 */
241 ldrgt r3, [r0], #0x04 /* 0x84 */
242 strd r4, [r1], #0x08
243 bgt 1b
244 ldmfd sp!, {r4, r5}
245 RET
246
247 /*
248 * XSCALE version of bzero_page
249 */
250 ENTRY(bzero_page)
251 mov r1, #PAGE_SIZE
252 mov r2, #0
253 mov r3, #0
254 1: strd r2, [r0], #8 /* 32 */
255 strd r2, [r0], #8
256 strd r2, [r0], #8
257 strd r2, [r0], #8
258 strd r2, [r0], #8 /* 64 */
259 strd r2, [r0], #8
260 strd r2, [r0], #8
261 strd r2, [r0], #8
262 strd r2, [r0], #8 /* 96 */
263 strd r2, [r0], #8
264 strd r2, [r0], #8
265 strd r2, [r0], #8
266 strd r2, [r0], #8 /* 128 */
267 strd r2, [r0], #8
268 strd r2, [r0], #8
269 strd r2, [r0], #8
270 subs r1, r1, #128
271 bne 1b
272 RET
273 #endif /* __XSCALE__ */
Cache object: 6338f9c3edb985386b6df4a005a97a7a
|