1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/stdatomic.h>
34 #include <sys/types.h>
35
36 #include <machine/atomic.h>
37 #include <machine/cpufunc.h>
38 #include <machine/sysarch.h>
39
40 /*
41 * Executing statements with interrupts disabled.
42 */
43
44 #if defined(_KERNEL) && !defined(SMP)
45 #define WITHOUT_INTERRUPTS(s) do { \
46 register_t regs; \
47 \
48 regs = intr_disable(); \
49 do s while (0); \
50 intr_restore(regs); \
51 } while (0)
52 #endif /* _KERNEL && !SMP */
53
54 /*
55 * Memory barriers.
56 *
57 * It turns out __sync_synchronize() does not emit any code when used
58 * with GCC 4.2. Implement our own version that does work reliably.
59 *
60 * Although __sync_lock_test_and_set() should only perform an acquire
61 * barrier, make it do a full barrier like the other functions. This
62 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
63 */
64
65 #if defined(_KERNEL) && !defined(SMP)
66 static inline void
67 do_sync(void)
68 {
69
70 __asm volatile ("" : : : "memory");
71 }
72 #else
73 static inline void
74 do_sync(void)
75 {
76
77 dmb();
78 }
79 #endif
80
81
82 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
83
84 #ifdef __clang__
85 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
86 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
87 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
88 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
89 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
90 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
91 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
92 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
93 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
94 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
95 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
96 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
97 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
98 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
99 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
100 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
101 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
102 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
103 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
104 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
105 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
106 #endif
107
108 /*
109 * Old __sync_* API.
110 */
111
112
113 /* Implementations for old GCC versions, lacking support for atomics. */
114
115 typedef union {
116 uint8_t v8[4];
117 uint32_t v32;
118 } reg_t;
119
120 /*
121 * Given a memory address pointing to an 8-bit or 16-bit integer, return
122 * the address of the 32-bit word containing it.
123 */
124
125 static inline uint32_t *
126 round_to_word(void *ptr)
127 {
128
129 return ((uint32_t *)((intptr_t)ptr & ~3));
130 }
131
132 /*
133 * Utility functions for loading and storing 8-bit and 16-bit integers
134 * in 32-bit words at an offset corresponding with the location of the
135 * atomic variable.
136 */
137
138 static inline void
139 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
140 {
141 size_t offset;
142
143 offset = (intptr_t)offset_ptr & 3;
144 r->v8[offset] = val;
145 }
146
147 static inline uint8_t
148 get_1(const reg_t *r, const uint8_t *offset_ptr)
149 {
150 size_t offset;
151
152 offset = (intptr_t)offset_ptr & 3;
153 return (r->v8[offset]);
154 }
155
156 static inline void
157 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
158 {
159 size_t offset;
160 union {
161 uint16_t in;
162 uint8_t out[2];
163 } bytes;
164
165 offset = (intptr_t)offset_ptr & 3;
166 bytes.in = val;
167 r->v8[offset] = bytes.out[0];
168 r->v8[offset + 1] = bytes.out[1];
169 }
170
171 static inline uint16_t
172 get_2(const reg_t *r, const uint16_t *offset_ptr)
173 {
174 size_t offset;
175 union {
176 uint8_t in[2];
177 uint16_t out;
178 } bytes;
179
180 offset = (intptr_t)offset_ptr & 3;
181 bytes.in[0] = r->v8[offset];
182 bytes.in[1] = r->v8[offset + 1];
183 return (bytes.out);
184 }
185
186 /*
187 * 8-bit and 16-bit routines.
188 *
189 * These operations are not natively supported by the CPU, so we use
190 * some shifting and bitmasking on top of the 32-bit instructions.
191 */
192
193 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
194 uintN_t \
195 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
196 { \
197 uint32_t *mem32; \
198 reg_t val32, negmask, old; \
199 uint32_t temp1, temp2; \
200 \
201 mem32 = round_to_word(mem); \
202 val32.v32 = 0x00000000; \
203 put_##N(&val32, mem, val); \
204 negmask.v32 = 0xffffffff; \
205 put_##N(&negmask, mem, 0); \
206 \
207 do_sync(); \
208 __asm volatile ( \
209 "1:" \
210 "\tldrex %0, %6\n" /* Load old value. */ \
211 "\tand %2, %5, %0\n" /* Remove the old value. */ \
212 "\torr %2, %2, %4\n" /* Put in the new value. */ \
213 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
214 "\tcmp %3, #0\n" /* Did it succeed? */ \
215 "\tbne 1b\n" /* Spin if failed. */ \
216 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
217 "=&r" (temp2) \
218 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
219 return (get_##N(&old, mem)); \
220 }
221
222 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
223 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
224
225 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
226 uintN_t \
227 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
228 uintN_t desired) \
229 { \
230 uint32_t *mem32; \
231 reg_t expected32, desired32, posmask, old; \
232 uint32_t negmask, temp1, temp2; \
233 \
234 mem32 = round_to_word(mem); \
235 expected32.v32 = 0x00000000; \
236 put_##N(&expected32, mem, expected); \
237 desired32.v32 = 0x00000000; \
238 put_##N(&desired32, mem, desired); \
239 posmask.v32 = 0x00000000; \
240 put_##N(&posmask, mem, ~0); \
241 negmask = ~posmask.v32; \
242 \
243 do_sync(); \
244 __asm volatile ( \
245 "1:" \
246 "\tldrex %0, %8\n" /* Load old value. */ \
247 "\tand %2, %6, %0\n" /* Isolate the old value. */ \
248 "\tcmp %2, %4\n" /* Compare to expected value. */\
249 "\tbne 2f\n" /* Values are unequal. */ \
250 "\tand %2, %7, %0\n" /* Remove the old value. */ \
251 "\torr %2, %5\n" /* Put in the new value. */ \
252 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
253 "\tcmp %3, #0\n" /* Did it succeed? */ \
254 "\tbne 1b\n" /* Spin if failed. */ \
255 "2:" \
256 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
257 "=&r" (temp2) \
258 : "r" (expected32.v32), "r" (desired32.v32), \
259 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
260 return (get_##N(&old, mem)); \
261 }
262
263 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
264 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
265
266 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
267 uintN_t \
268 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
269 { \
270 uint32_t *mem32; \
271 reg_t val32, posmask, old; \
272 uint32_t negmask, temp1, temp2; \
273 \
274 mem32 = round_to_word(mem); \
275 val32.v32 = 0x00000000; \
276 put_##N(&val32, mem, val); \
277 posmask.v32 = 0x00000000; \
278 put_##N(&posmask, mem, ~0); \
279 negmask = ~posmask.v32; \
280 \
281 do_sync(); \
282 __asm volatile ( \
283 "1:" \
284 "\tldrex %0, %7\n" /* Load old value. */ \
285 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
286 "\tand %2, %5\n" /* Isolate the new value. */ \
287 "\tand %3, %6, %0\n" /* Remove the old value. */ \
288 "\torr %2, %2, %3\n" /* Put in the new value. */ \
289 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
290 "\tcmp %3, #0\n" /* Did it succeed? */ \
291 "\tbne 1b\n" /* Spin if failed. */ \
292 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
293 "=&r" (temp2) \
294 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
295 "m" (*mem32)); \
296 return (get_##N(&old, mem)); \
297 }
298
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
300 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
301 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
302 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
303
304 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
305 uintN_t \
306 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
307 { \
308 uint32_t *mem32; \
309 reg_t val32, old; \
310 uint32_t temp1, temp2; \
311 \
312 mem32 = round_to_word(mem); \
313 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
314 put_##N(&val32, mem, val); \
315 \
316 do_sync(); \
317 __asm volatile ( \
318 "1:" \
319 "\tldrex %0, %5\n" /* Load old value. */ \
320 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \
321 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
322 "\tcmp %3, #0\n" /* Did it succeed? */ \
323 "\tbne 1b\n" /* Spin if failed. */ \
324 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
325 "=&r" (temp2) \
326 : "r" (val32.v32), "m" (*mem32)); \
327 return (get_##N(&old, mem)); \
328 }
329
330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
331 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
332 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
334 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
335 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
336
337 /*
338 * 32-bit routines.
339 */
340
341 uint32_t
342 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
343 {
344 uint32_t old, temp;
345
346 do_sync();
347 __asm volatile (
348 "1:"
349 "\tldrex %0, %4\n" /* Load old value. */
350 "\tstrex %2, %3, %1\n" /* Attempt to store. */
351 "\tcmp %2, #0\n" /* Did it succeed? */
352 "\tbne 1b\n" /* Spin if failed. */
353 : "=&r" (old), "=m" (*mem), "=&r" (temp)
354 : "r" (val), "m" (*mem));
355 return (old);
356 }
357
358 uint32_t
359 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
360 uint32_t desired)
361 {
362 uint32_t old, temp;
363
364 do_sync();
365 __asm volatile (
366 "1:"
367 "\tldrex %0, %5\n" /* Load old value. */
368 "\tcmp %0, %3\n" /* Compare to expected value. */
369 "\tbne 2f\n" /* Values are unequal. */
370 "\tstrex %2, %4, %1\n" /* Attempt to store. */
371 "\tcmp %2, #0\n" /* Did it succeed? */
372 "\tbne 1b\n" /* Spin if failed. */
373 "2:"
374 : "=&r" (old), "=m" (*mem), "=&r" (temp)
375 : "r" (expected), "r" (desired), "m" (*mem));
376 return (old);
377 }
378
379 #define EMIT_FETCH_AND_OP_4(name, op) \
380 uint32_t \
381 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \
382 { \
383 uint32_t old, temp1, temp2; \
384 \
385 do_sync(); \
386 __asm volatile ( \
387 "1:" \
388 "\tldrex %0, %5\n" /* Load old value. */ \
389 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
390 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
391 "\tcmp %3, #0\n" /* Did it succeed? */ \
392 "\tbne 1b\n" /* Spin if failed. */ \
393 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \
394 "=&r" (temp2) \
395 : "r" (val), "m" (*mem)); \
396 return (old); \
397 }
398
399 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
400 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
401 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
402 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
403 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
404
405 #ifndef __clang__
406 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
407 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
408 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
409 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
410 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
411 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
412 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
413 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
414 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
415 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
416 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
417 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
418 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
419 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
420 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
421 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
422 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
423 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
424 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
425 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
426 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
427 #endif
428
429 #endif /* __SYNC_ATOMICS */
Cache object: 071cf64d2574db212b0168d51f3e02d3
|