1 /*-
2 * Copyright (C) 2016 Cavium Inc.
3 * All rights reserved.
4 *
5 * Developed by Semihalf.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <machine/asm.h>
30 __FBSDID("$FreeBSD: releng/11.2/sys/arm64/arm64/bzero.S 297537 2016-04-04 07:11:33Z wma $");
31
32
33 #include "assym.s"
34
35 /*
36 * void bzero(void *p, size_t size)
37 *
38 * x0 - p
39 * x1 - size
40 */
41 ENTRY(bzero)
42 cbz x1, ending
43
44 /*
45 * x5 is number of cache lines to zero - calculated later and
46 * will become non-zero if buffer is long enough to zero by
47 * cache lines (and if it is allowed.)
48 * We need to zero it before proceeding with buffers of size
49 * smaller than 16 bytes - otherwise the x5 will not be
50 * calculated and will retain random value.
51 * "normal" is used for buffers <= 16 bytes and to align buffer
52 * to cache line for buffers bigger than cache line; non-0 x5
53 * after "normal" has completed indicates that it has been used
54 * to align buffer to cache line and now zero by cache lines will
55 * be performed, and x5 is amount of cache lines to loop through.
56 */
57 mov x5, xzr
58
59 /* No use of cache assisted zero for buffers with size <= 16 */
60 cmp x1, #0x10
61 b.le normal
62
63 /*
64 * Load size of line that will be cleaned by dc zva call.
65 * 0 means that the instruction is not allowed
66 */
67 ldr x7, =dczva_line_size
68 ldr x7, [x7]
69 cbz x7, normal
70
71 /*
72 * Buffer must be larger than cache line for using cache zeroing
73 * (and cache line aligned but this is checked after jump)
74 */
75 cmp x1, x7
76 b.lt normal
77
78 /*
79 * Calculate number of bytes to cache aligned address (x4) nad
80 * number of full cache lines (x5). x6 is final address to zero.
81 */
82 sub x2, x7, #0x01
83 mov x3, -1
84 eor x3, x3, x2
85 add x4, x0, x2
86 and x4, x4, x3
87 subs x4, x4, x0
88 b.eq normal
89
90 /* Calculate number of "lines" in buffer */
91 sub x5, x1, x4
92 rbit x2, x7
93 clz x2, x2
94 lsr x5, x5, x2
95
96 /*
97 * If number of cache lines is 0, we will not be able to zero
98 * by cache lines, so go normal way.
99 */
100 cbz x5, normal
101 /* x6 is final address to zero */
102 add x6, x0, x1
103
104 /*
105 * We are here because x5 is non-0 so normal will be used to
106 * align buffer before cache zeroing. x4 holds number of bytes
107 * needed for alignment.
108 */
109 mov x1, x4
110
111 /* When jumping here: x0 holds pointer, x1 holds size */
112 normal:
113 /*
114 * Get buffer offset into 16 byte aligned address; 0 means pointer
115 * is aligned.
116 */
117 ands x2, x0, #0x0f
118 b.eq aligned_to_16
119 /* Calculate one-byte loop runs to 8 byte aligned address. */
120 ands x2, x2, #0x07
121 mov x3, #0x08
122 sub x2, x3, x2
123 /* x2 is number of bytes missing for alignment, x1 is buffer size */
124 cmp x1, x2
125 csel x2, x1, x2, le
126 sub x1, x1, x2
127
128 /*
129 * Byte by byte copy will copy at least enough bytes to align
130 * pointer and at most "size".
131 */
132 align:
133 strb wzr, [x0], #0x01
134 subs x2, x2, #0x01
135 b.ne align
136
137 /* Now pointer is aligned to 8 bytes */
138 cmp x1, #0x10
139 b.lt lead_out
140 /*
141 * Check if copy of another 8 bytes is needed to align to 16 byte
142 * address and do it
143 */
144 tbz x0, #0x03, aligned_to_16
145 str xzr, [x0], #0x08
146 sub x1, x1, #0x08
147
148 /* While jumping here: x0 is 16 byte alligned address, x1 is size */
149 aligned_to_16:
150 /* If size is less than 16 bytes, use lead_out to copy what remains */
151 cmp x1, #0x10
152 b.lt lead_out
153
154 lsr x2, x1, #0x04
155 zero_by_16:
156 stp xzr, xzr, [x0], #0x10
157 subs x2, x2, #0x01
158 b.ne zero_by_16
159
160 /*
161 * Lead out requires addresses to be aligned to 8 bytes. It is used to
162 * zero buffers with sizes < 16 and what can not be zeroed by
163 * zero_by_16 loop.
164 */
165 ands x1, x1, #0x0f
166 b.eq lead_out_end
167 lead_out:
168 tbz x1, #0x03, lead_out_dword
169 str xzr, [x0], #0x08
170 lead_out_dword:
171 tbz x1, #0x02, lead_out_word
172 str wzr, [x0], #0x04
173 lead_out_word:
174 tbz x1, #0x01, lead_out_byte
175 strh wzr, [x0], #0x02
176 lead_out_byte:
177 tbz x1, #0x00, lead_out_end
178 strb wzr, [x0], #0x01
179
180 lead_out_end:
181 /*
182 * If x5 is non-zero, this means that normal has been used as
183 * a lead in to align buffer address to cache size
184 */
185 cbz x5, ending
186
187 /*
188 * Here x5 holds number of lines to zero; x6 is final address of
189 * buffer. x0 is cache line aligned pointer. x7 is cache line size
190 * in bytes
191 */
192 cache_line_zero:
193 dc zva, x0
194 add x0, x0, x7
195 subs x5, x5, #0x01
196 b.ne cache_line_zero
197
198 /* Need to zero remaining bytes? */
199 subs x1, x6, x0
200 b.ne normal
201
202 ending:
203 ret
204
205 END(bzero)
206
Cache object: 3d9d09282e2f26f98a427c207e7edf65
|