1 #include <machine/asm.h>
2
3
4 /*
5 * a0: source address
6 * a1: length of the area to checksum
7 * a2: partial checksum
8 * a3: dst
9 */
10
11 #define src a0
12 #define dst a3
13 #define sum v0
14
15 .text
16 .set noreorder
17
18 .macro CSUM_BIGCHUNK_AND_COPY offset
19 pref 0, (\offset+0x0)(a0)
20 ld t0, (\offset+0x00)(a0)
21 ld t1, (\offset+0x08)(a0)
22 .word 0x70481038 /*daddwc v0, v0, t0 */
23 .word 0x70491038 /*daddwc v0, v0, t1 */
24 ld t0, (\offset + 0x10)(a0)
25 ld t1, (\offset + 0x18)(a0)
26 .word 0x70481038 /* daddwc v0, v0, t0 */
27 .word 0x70491038 /*daddwc v0, v0, t1 */
28 .endm
29
30 small_csumcpy: /* unknown src alignment and < 8 bytes to go */
31 move a1, t2
32
33 andi t0, a1, 4
34 beqz t0, 1f
35 andi t0, a1, 2
36
37 ulw t1, (src) /* Still a full word to go */
38 daddiu src, 4
39 .word 0x70491038 /*daddwc v0, v0, t1 */
40
41 1: move t1, zero
42 beqz t0, 1f
43 andi t0, a1, 1
44
45 ulhu t1, (src) /* Still a halfword to go */
46 daddiu src, 2
47
48 1: beqz t0, 1f
49 sll t1, t1, 16
50
51 lbu t2, (src)
52 nop
53
54 #ifdef __MIPSEB__
55 sll t2, t2, 8
56 #endif
57 or t1, t2
58
59 1: .word 0x70491038 /*daddwc v0, v0, t1 */
60
61 .word 0x70461038 /*daddwc v0, v0, a2 */
62 .word 0x70401038 /*daddwc v0, v0, $0 */
63
64 /* Ideally at this point of time the status flag must be cleared */
65
66 dsll32 v1, sum, 0
67 .word 0x70431038 /*daddwc v0, v0, v1 */
68 dsrl32 sum, sum, 0
69 .word 0x70401038 /*daddwc v0, v0, zero */
70
71 /* fold the checksum */
72 sll v1, sum, 16
73 addu sum, v1
74 sltu v1, sum, v1
75 srl sum, sum, 16
76 addu sum, v1
77 1:
78 .set reorder
79 jr ra
80 .set noreorder
81
82 /* ------------------------------------------------------------------ */
83
84 .align 5
85 LEAF(xlr_csum_partial_nocopy)
86 move sum, zero
87 move t7, zero
88
89 sltiu t8, a1, 0x8
90 bnez t8, small_csumcpy /* < 8 bytes to copy */
91 move t2, a1
92
93 beqz a1, out
94 andi t7, src, 0x1 /* odd buffer? */
95
96 hword_align:
97 beqz t7, word_align
98 andi t8, src, 0x2
99
100 lbu t0, (src)
101 dsubu a1, a1, 0x1
102 .word 0x70481038 /*daddwc v0, v0, t0 */
103 daddu src, src, 0x1
104 andi t8, src, 0x2
105
106 word_align:
107 beqz t8, dword_align
108 sltiu t8, a1, 56
109
110 lhu t0, (src)
111 dsubu a1, a1, 0x2
112 .word 0x70481038 /*daddwc v0, v0, t0 */
113 sltiu t8, a1, 56
114 daddu src, src, 0x2
115
116 dword_align:
117 bnez t8, do_end_words
118 move t8, a1
119
120 andi t8, src, 0x4
121 beqz t8, qword_align
122 andi t8, src, 0x8
123
124 lw t0, 0x00(src)
125 dsubu a1, a1, 0x4
126 .word 0x70481038 /*daddwc v0, v0, t0 */
127 daddu src, src, 0x4
128 andi t8, src, 0x8
129
130 qword_align:
131 beqz t8, oword_align
132 andi t8, src, 0x10
133
134 ld t0, 0x00(src)
135 dsubu a1, a1, 0x8
136 .word 0x70481038 /*daddwc v0, v0, t0 */
137 daddu src, src, 0x8
138 andi t8, src, 0x10
139
140 oword_align:
141 beqz t8, begin_movement
142 dsrl t8, a1, 0x7
143
144 ld t3, 0x08(src)
145 ld t0, 0x00(src)
146 .word 0x704b1038 /*daddwc v0, v0, t3 */
147 .word 0x70481038 /*daddwc v0, v0, t0 */
148 dsubu a1, a1, 0x10
149 daddu src, src, 0x10
150 dsrl t8, a1, 0x7
151
152 begin_movement:
153 beqz t8, 1f
154 andi t2, a1, 0x40
155
156 move_128bytes:
157 pref 0, 0x20(a0)
158 pref 0, 0x40(a0)
159 pref 0, 0x60(a0)
160 CSUM_BIGCHUNK_AND_COPY(0x00)
161 CSUM_BIGCHUNK_AND_COPY(0x20)
162 CSUM_BIGCHUNK_AND_COPY(0x40)
163 CSUM_BIGCHUNK_AND_COPY(0x60)
164 dsubu t8, t8, 0x01
165 bnez t8, move_128bytes /* flag */
166 daddu src, src, 0x80
167
168 1:
169 beqz t2, 1f
170 andi t2, a1, 0x20
171
172 move_64bytes:
173 pref 0, 0x20(a0)
174 pref 0, 0x40(a0)
175 CSUM_BIGCHUNK_AND_COPY(0x00)
176 CSUM_BIGCHUNK_AND_COPY(0x20)
177 daddu src, src, 0x40
178
179 1:
180 beqz t2, do_end_words
181 andi t8, a1, 0x1c
182
183 move_32bytes:
184 pref 0, 0x20(a0)
185 CSUM_BIGCHUNK_AND_COPY(0x00)
186 andi t8, a1, 0x1c
187 daddu src, src, 0x20
188
189 do_end_words:
190 beqz t8, maybe_end_cruft
191 dsrl t8, t8, 0x2
192
193 end_words:
194 lw t0, (src)
195 dsubu t8, t8, 0x1
196 .word 0x70481038 /*daddwc v0, v0, t0 */
197 bnez t8, end_words
198 daddu src, src, 0x4
199
200 maybe_end_cruft:
201 andi t2, a1, 0x3
202
203 small_memcpy:
204 j small_csumcpy; move a1, t2
205 beqz t2, out
206 move a1, t2
207
208 end_bytes:
209 lb t0, (src)
210 dsubu a1, a1, 0x1
211 bnez a2, end_bytes
212 daddu src, src, 0x1
213
214 out:
215 jr ra
216 move v0, sum
217 END(xlr_csum_partial_nocopy)
Cache object: 452f7b8fbbc53ff0e4987387e84e22c0
|