FreeBSD/Linux Kernel Cross Reference
sys/mips/mips/bcopy.S
1 /* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */
2
3 /*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29 /*
30 * File: mips_bcopy.s
31 * Author: Chris Maeda
32 * Date: June 1993
33 *
34 * Fast copy routine. Derived from aligned_block_copy.
35 */
36
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <machine/endian.h>
42
43 #if defined(LIBC_SCCS) && !defined(lint)
44 #if 0
45 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
46 #else
47 ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
48 #endif
49 #endif /* LIBC_SCCS and not lint */
50
51 #ifdef __ABICALLS__
52 .abicalls
53 #endif
54
55 /*
56 * bcopy(caddr_t src, caddr_t dst, unsigned int len)
57 *
58 * a0 src address
59 * a1 dst address
60 * a2 length
61 */
62
63 #define SRCREG a0
64 #define DSTREG a1
65 #define SIZEREG a2
66
67 LEAF(memcpy)
68 XLEAF(memmove)
69 .set noat
70 .set noreorder
71
72 move v0, a0
73 move a0, a1
74 move a1, v0
75
76 XLEAF(bcopy)
77 XLEAF(ovbcopy)
78 /*
79 * Make sure we can copy forwards.
80 */
81 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
82 bne t0,zero,6f # copy backwards
83
84 /*
85 * There are four alignment cases (with frequency)
86 * (Based on measurements taken with a DECstation 5000/200
87 * inside a Mach kernel.)
88 *
89 * aligned -> aligned (mostly)
90 * unaligned -> aligned (sometimes)
91 * aligned,unaligned -> unaligned (almost never)
92 *
93 * Note that we could add another case that checks if
94 * the destination and source are unaligned but the
95 * copy is alignable. eg if src and dest are both
96 * on a halfword boundary.
97 */
98 andi t1,DSTREG,(SZREG-1) # get last bits of dest
99 bne t1,zero,3f # dest unaligned
100 andi t0,SRCREG,(SZREG-1) # get last bits of src
101 bne t0,zero,5f
102
103 /*
104 * Forward aligned->aligned copy, 8 words at a time.
105 */
106 98:
107 li AT,-(SZREG*8)
108 and t0,SIZEREG,AT # count truncated to multiples
109 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
110 sltu AT,SRCREG,a3 # any work to do?
111 beq AT,zero,2f
112 PTR_SUBU SIZEREG,t0
113
114 /*
115 * loop body
116 */
117 1: # cp
118 REG_L t3,(0*SZREG)(SRCREG)
119 REG_L v1,(1*SZREG)(SRCREG)
120 REG_L t0,(2*SZREG)(SRCREG)
121 REG_L t1,(3*SZREG)(SRCREG)
122 PTR_ADDU SRCREG,SZREG*8
123 REG_S t3,(0*SZREG)(DSTREG)
124 REG_S v1,(1*SZREG)(DSTREG)
125 REG_S t0,(2*SZREG)(DSTREG)
126 REG_S t1,(3*SZREG)(DSTREG)
127 REG_L t1,(-1*SZREG)(SRCREG)
128 REG_L t0,(-2*SZREG)(SRCREG)
129 REG_L v1,(-3*SZREG)(SRCREG)
130 REG_L t3,(-4*SZREG)(SRCREG)
131 PTR_ADDU DSTREG,SZREG*8
132 REG_S t1,(-1*SZREG)(DSTREG)
133 REG_S t0,(-2*SZREG)(DSTREG)
134 REG_S v1,(-3*SZREG)(DSTREG)
135 bne SRCREG,a3,1b
136 REG_S t3,(-4*SZREG)(DSTREG)
137
138 /*
139 * Copy a word at a time, no loop unrolling.
140 */
141 2: # wordcopy
142 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
143 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
144 beq t2,zero,3f
145 PTR_ADDU t0,SRCREG,t2 # stop at t0
146 PTR_SUBU SIZEREG,SIZEREG,t2
147 1:
148 REG_L t3,0(SRCREG)
149 PTR_ADDU SRCREG,SZREG
150 REG_S t3,0(DSTREG)
151 bne SRCREG,t0,1b
152 PTR_ADDU DSTREG,SZREG
153
154 3: # bytecopy
155 beq SIZEREG,zero,4f # nothing left to do?
156 nop
157 1:
158 lb t3,0(SRCREG)
159 PTR_ADDU SRCREG,1
160 sb t3,0(DSTREG)
161 PTR_SUBU SIZEREG,1
162 bgtz SIZEREG,1b
163 PTR_ADDU DSTREG,1
164
165 4: # copydone
166 j ra
167 nop
168
169 /*
170 * Copy from unaligned source to aligned dest.
171 */
172 5: # destaligned
173 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
174 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
175 beq a3,zero,3b
176 nop
177 move SIZEREG,t0 # this many to do after we are done
178 PTR_ADDU a3,SRCREG,a3 # stop point
179
180 1:
181 REG_LHI t3,0(SRCREG)
182 REG_LLO t3,SZREG-1(SRCREG)
183 PTR_ADDI SRCREG,SZREG
184 REG_S t3,0(DSTREG)
185 bne SRCREG,a3,1b
186 PTR_ADDI DSTREG,SZREG
187
188 b 3b
189 nop
190
191 6: # backcopy -- based on above
192 PTR_ADDU SRCREG,SIZEREG
193 PTR_ADDU DSTREG,SIZEREG
194 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
195 bne t1,zero,3f
196 andi t0,SRCREG,SZREG-1 # get last 3 bits of src
197 bne t0,zero,5f
198
199 /*
200 * Forward aligned->aligned copy, 8*4 bytes at a time.
201 */
202 li AT,(-8*SZREG)
203 and t0,SIZEREG,AT # count truncated to multiple of 32
204 beq t0,zero,2f # any work to do?
205 PTR_SUBU SIZEREG,t0
206 PTR_SUBU a3,SRCREG,t0
207
208 /*
209 * loop body
210 */
211 1: # cp
212 REG_L t3,(-4*SZREG)(SRCREG)
213 REG_L v1,(-3*SZREG)(SRCREG)
214 REG_L t0,(-2*SZREG)(SRCREG)
215 REG_L t1,(-1*SZREG)(SRCREG)
216 PTR_SUBU SRCREG,8*SZREG
217 REG_S t3,(-4*SZREG)(DSTREG)
218 REG_S v1,(-3*SZREG)(DSTREG)
219 REG_S t0,(-2*SZREG)(DSTREG)
220 REG_S t1,(-1*SZREG)(DSTREG)
221 REG_L t1,(3*SZREG)(SRCREG)
222 REG_L t0,(2*SZREG)(SRCREG)
223 REG_L v1,(1*SZREG)(SRCREG)
224 REG_L t3,(0*SZREG)(SRCREG)
225 PTR_SUBU DSTREG,8*SZREG
226 REG_S t1,(3*SZREG)(DSTREG)
227 REG_S t0,(2*SZREG)(DSTREG)
228 REG_S v1,(1*SZREG)(DSTREG)
229 bne SRCREG,a3,1b
230 REG_S t3,(0*SZREG)(DSTREG)
231
232 /*
233 * Copy a word at a time, no loop unrolling.
234 */
235 2: # wordcopy
236 andi t2,SIZEREG,SZREG-1 # get byte count / 4
237 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
238 beq t2,zero,3f
239 PTR_SUBU t0,SRCREG,t2 # stop at t0
240 PTR_SUBU SIZEREG,SIZEREG,t2
241 1:
242 REG_L t3,-SZREG(SRCREG)
243 PTR_SUBU SRCREG,SZREG
244 REG_S t3,-SZREG(DSTREG)
245 bne SRCREG,t0,1b
246 PTR_SUBU DSTREG,SZREG
247
248 3: # bytecopy
249 beq SIZEREG,zero,4f # nothing left to do?
250 nop
251 1:
252 lb t3,-1(SRCREG)
253 PTR_SUBU SRCREG,1
254 sb t3,-1(DSTREG)
255 PTR_SUBU SIZEREG,1
256 bgtz SIZEREG,1b
257 PTR_SUBU DSTREG,1
258
259 4: # copydone
260 j ra
261 nop
262
263 /*
264 * Copy from unaligned source to aligned dest.
265 */
266 5: # destaligned
267 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
268 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
269 beq a3,zero,3b
270 nop
271 move SIZEREG,t0 # this many to do after we are done
272 PTR_SUBU a3,SRCREG,a3 # stop point
273
274 1:
275 REG_LHI t3,-SZREG(SRCREG)
276 REG_LLO t3,-1(SRCREG)
277 PTR_SUBU SRCREG,SZREG
278 REG_S t3,-SZREG(DSTREG)
279 bne SRCREG,a3,1b
280 PTR_SUBU DSTREG,SZREG
281
282 b 3b
283 nop
284
285 .set reorder
286 .set at
287 END(memcpy)
Cache object: 64af6f4378f6fcda4e050a1e7e701263
|