1 /*-
2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/stdatomic.c 255738 2013-09-20 20:44:32Z zbb $");
29
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33
34 #include <machine/cpufunc.h>
35 #include <machine/sysarch.h>
36
37 #ifdef _KERNEL
38 #include "opt_global.h"
39 #endif
40
41 /*
42 * Executing statements with interrupts disabled.
43 */
44
45 #if defined(_KERNEL) && !defined(SMP)
46 #define WITHOUT_INTERRUPTS(s) do { \
47 register_t regs; \
48 \
49 regs = intr_disable(); \
50 do s while (0); \
51 intr_restore(regs); \
52 } while (0)
53 #endif /* _KERNEL && !SMP */
54
55 /*
56 * Memory barriers.
57 *
58 * It turns out __sync_synchronize() does not emit any code when used
59 * with GCC 4.2. Implement our own version that does work reliably.
60 *
61 * Although __sync_lock_test_and_set() should only perform an acquire
62 * barrier, make it do a full barrier like the other functions. This
63 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
64 */
65
66 #if defined(_KERNEL) && !defined(SMP)
67 static inline void
68 do_sync(void)
69 {
70
71 __asm volatile ("" : : : "memory");
72 }
73 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
74 static inline void
75 do_sync(void)
76 {
77
78 __asm volatile ("dmb" : : : "memory");
79 }
80 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
81 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
82 defined(__ARM_ARCH_6ZK__)
83 static inline void
84 do_sync(void)
85 {
86
87 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
88 }
89 #endif
90
91 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
92
93 /*
94 * New C11 __atomic_* API.
95 */
96
97 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
98 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
99 defined(__ARM_ARCH_6ZK__) || \
100 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
101
102 /* These systems should be supported by the compiler. */
103
104 #else /* __ARM_ARCH_5__ */
105
106 /* Clang doesn't allow us to reimplement builtins without this. */
107 #ifdef __clang__
108 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
109 #define __sync_synchronize __sync_synchronize_ext
110 #endif
111
112 void
113 __sync_synchronize(void)
114 {
115 }
116
117 #ifdef _KERNEL
118
119 #ifdef SMP
120 #error "On SMP systems we should have proper atomic operations."
121 #endif
122
123 /*
124 * On uniprocessor systems, we can perform the atomic operations by
125 * disabling interrupts.
126 */
127
128 #define EMIT_LOAD_N(N, uintN_t) \
129 uintN_t \
130 __atomic_load_##N(uintN_t *mem, int model __unused) \
131 { \
132 uintN_t ret; \
133 \
134 WITHOUT_INTERRUPTS({ \
135 ret = *mem; \
136 }); \
137 return (ret); \
138 }
139
140 #define EMIT_STORE_N(N, uintN_t) \
141 void \
142 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \
143 { \
144 \
145 WITHOUT_INTERRUPTS({ \
146 *mem = val; \
147 }); \
148 }
149
150 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \
151 _Bool \
152 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \
153 uintN_t desired, int success __unused, int failure __unused) \
154 { \
155 _Bool ret; \
156 \
157 WITHOUT_INTERRUPTS({ \
158 if (*mem == *expected) { \
159 *mem = desired; \
160 ret = 1; \
161 } else { \
162 *expected = *mem; \
163 ret = 0; \
164 } \
165 }); \
166 return (ret); \
167 }
168
169 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \
170 uintN_t \
171 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \
172 { \
173 uintN_t ret; \
174 \
175 WITHOUT_INTERRUPTS({ \
176 ret = *mem; \
177 *mem op val; \
178 }); \
179 return (ret); \
180 }
181
182 #define EMIT_ALL_OPS_N(N, uintN_t) \
183 EMIT_LOAD_N(N, uintN_t) \
184 EMIT_STORE_N(N, uintN_t) \
185 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \
186 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \
187 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \
188 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \
189 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \
190 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \
191 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
192
193 EMIT_ALL_OPS_N(1, uint8_t)
194 EMIT_ALL_OPS_N(2, uint16_t)
195 EMIT_ALL_OPS_N(4, uint32_t)
196 EMIT_ALL_OPS_N(8, uint64_t)
197 #undef EMIT_ALL_OPS_N
198
199 #else /* !_KERNEL */
200
201 /*
202 * For userspace on uniprocessor systems, we can implement the atomic
203 * operations by using a Restartable Atomic Sequence. This makes the
204 * kernel restart the code from the beginning when interrupted.
205 */
206
207 #define EMIT_LOAD_N(N, uintN_t) \
208 uintN_t \
209 __atomic_load_##N(uintN_t *mem, int model __unused) \
210 { \
211 \
212 return (*mem); \
213 }
214
215 #define EMIT_STORE_N(N, uintN_t) \
216 void \
217 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \
218 { \
219 \
220 *mem = val; \
221 }
222
223 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \
224 uintN_t \
225 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \
226 { \
227 uint32_t old, temp, ras_start; \
228 \
229 ras_start = ARM_RAS_START; \
230 __asm volatile ( \
231 /* Set up Restartable Atomic Sequence. */ \
232 "1:" \
233 "\tadr %2, 1b\n" \
234 "\tstr %2, [%5]\n" \
235 "\tadr %2, 2f\n" \
236 "\tstr %2, [%5, #4]\n" \
237 \
238 "\t"ldr" %0, %4\n" /* Load old value. */ \
239 "\t"str" %3, %1\n" /* Store new value. */ \
240 \
241 /* Tear down Restartable Atomic Sequence. */ \
242 "2:" \
243 "\tmov %2, #0x00000000\n" \
244 "\tstr %2, [%5]\n" \
245 "\tmov %2, #0xffffffff\n" \
246 "\tstr %2, [%5, #4]\n" \
247 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
248 : "r" (val), "m" (*mem), "r" (ras_start)); \
249 return (old); \
250 }
251
252 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \
253 _Bool \
254 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \
255 uintN_t desired, int success __unused, int failure __unused) \
256 { \
257 uint32_t expected, old, temp, ras_start; \
258 \
259 expected = *pexpected; \
260 ras_start = ARM_RAS_START; \
261 __asm volatile ( \
262 /* Set up Restartable Atomic Sequence. */ \
263 "1:" \
264 "\tadr %2, 1b\n" \
265 "\tstr %2, [%6]\n" \
266 "\tadr %2, 2f\n" \
267 "\tstr %2, [%6, #4]\n" \
268 \
269 "\t"ldr" %0, %5\n" /* Load old value. */ \
270 "\tcmp %0, %3\n" /* Compare to expected value. */\
271 "\t"streq" %4, %1\n" /* Store new value. */ \
272 \
273 /* Tear down Restartable Atomic Sequence. */ \
274 "2:" \
275 "\tmov %2, #0x00000000\n" \
276 "\tstr %2, [%6]\n" \
277 "\tmov %2, #0xffffffff\n" \
278 "\tstr %2, [%6, #4]\n" \
279 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
280 : "r" (expected), "r" (desired), "m" (*mem), \
281 "r" (ras_start)); \
282 if (old == expected) { \
283 return (1); \
284 } else { \
285 *pexpected = old; \
286 return (0); \
287 } \
288 }
289
290 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \
291 uintN_t \
292 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \
293 { \
294 uint32_t old, temp, ras_start; \
295 \
296 ras_start = ARM_RAS_START; \
297 __asm volatile ( \
298 /* Set up Restartable Atomic Sequence. */ \
299 "1:" \
300 "\tadr %2, 1b\n" \
301 "\tstr %2, [%5]\n" \
302 "\tadr %2, 2f\n" \
303 "\tstr %2, [%5, #4]\n" \
304 \
305 "\t"ldr" %0, %4\n" /* Load old value. */ \
306 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \
307 "\t"str" %2, %1\n" /* Store new value. */ \
308 \
309 /* Tear down Restartable Atomic Sequence. */ \
310 "2:" \
311 "\tmov %2, #0x00000000\n" \
312 "\tstr %2, [%5]\n" \
313 "\tmov %2, #0xffffffff\n" \
314 "\tstr %2, [%5, #4]\n" \
315 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
316 : "r" (val), "m" (*mem), "r" (ras_start)); \
317 return (old); \
318 }
319
320 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \
321 EMIT_LOAD_N(N, uintN_t) \
322 EMIT_STORE_N(N, uintN_t) \
323 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \
324 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \
325 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \
326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \
327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \
328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \
329 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
330
331 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
332 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
333 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
334 #undef EMIT_ALL_OPS_N
335
336 #endif /* _KERNEL */
337
338 #endif
339
340 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
341
342 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
343
344 #ifdef __clang__
345 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
346 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
347 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
348 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
349 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
350 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
351 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
352 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
353 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
354 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
355 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
356 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
357 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
358 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
359 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
360 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
361 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
362 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
363 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
364 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
365 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
366 #endif
367
368 /*
369 * Old __sync_* API.
370 */
371
372 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
373 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
374 defined(__ARM_ARCH_6ZK__) || \
375 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
376
377 /* Implementations for old GCC versions, lacking support for atomics. */
378
379 typedef union {
380 uint8_t v8[4];
381 uint32_t v32;
382 } reg_t;
383
384 /*
385 * Given a memory address pointing to an 8-bit or 16-bit integer, return
386 * the address of the 32-bit word containing it.
387 */
388
389 static inline uint32_t *
390 round_to_word(void *ptr)
391 {
392
393 return ((uint32_t *)((intptr_t)ptr & ~3));
394 }
395
396 /*
397 * Utility functions for loading and storing 8-bit and 16-bit integers
398 * in 32-bit words at an offset corresponding with the location of the
399 * atomic variable.
400 */
401
402 static inline void
403 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
404 {
405 size_t offset;
406
407 offset = (intptr_t)offset_ptr & 3;
408 r->v8[offset] = val;
409 }
410
411 static inline uint8_t
412 get_1(const reg_t *r, const uint8_t *offset_ptr)
413 {
414 size_t offset;
415
416 offset = (intptr_t)offset_ptr & 3;
417 return (r->v8[offset]);
418 }
419
420 static inline void
421 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
422 {
423 size_t offset;
424 union {
425 uint16_t in;
426 uint8_t out[2];
427 } bytes;
428
429 offset = (intptr_t)offset_ptr & 3;
430 bytes.in = val;
431 r->v8[offset] = bytes.out[0];
432 r->v8[offset + 1] = bytes.out[1];
433 }
434
435 static inline uint16_t
436 get_2(const reg_t *r, const uint16_t *offset_ptr)
437 {
438 size_t offset;
439 union {
440 uint8_t in[2];
441 uint16_t out;
442 } bytes;
443
444 offset = (intptr_t)offset_ptr & 3;
445 bytes.in[0] = r->v8[offset];
446 bytes.in[1] = r->v8[offset + 1];
447 return (bytes.out);
448 }
449
450 /*
451 * 8-bit and 16-bit routines.
452 *
453 * These operations are not natively supported by the CPU, so we use
454 * some shifting and bitmasking on top of the 32-bit instructions.
455 */
456
457 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
458 uintN_t \
459 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
460 { \
461 uint32_t *mem32; \
462 reg_t val32, negmask, old; \
463 uint32_t temp1, temp2; \
464 \
465 mem32 = round_to_word(mem); \
466 val32.v32 = 0x00000000; \
467 put_##N(&val32, mem, val); \
468 negmask.v32 = 0xffffffff; \
469 put_##N(&negmask, mem, 0); \
470 \
471 do_sync(); \
472 __asm volatile ( \
473 "1:" \
474 "\tldrex %0, %6\n" /* Load old value. */ \
475 "\tand %2, %5, %0\n" /* Remove the old value. */ \
476 "\torr %2, %2, %4\n" /* Put in the new value. */ \
477 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
478 "\tcmp %3, #0\n" /* Did it succeed? */ \
479 "\tbne 1b\n" /* Spin if failed. */ \
480 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
481 "=&r" (temp2) \
482 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
483 return (get_##N(&old, mem)); \
484 }
485
486 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
487 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
488
489 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
490 uintN_t \
491 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
492 uintN_t desired) \
493 { \
494 uint32_t *mem32; \
495 reg_t expected32, desired32, posmask, old; \
496 uint32_t negmask, temp1, temp2; \
497 \
498 mem32 = round_to_word(mem); \
499 expected32.v32 = 0x00000000; \
500 put_##N(&expected32, mem, expected); \
501 desired32.v32 = 0x00000000; \
502 put_##N(&desired32, mem, desired); \
503 posmask.v32 = 0x00000000; \
504 put_##N(&posmask, mem, ~0); \
505 negmask = ~posmask.v32; \
506 \
507 do_sync(); \
508 __asm volatile ( \
509 "1:" \
510 "\tldrex %0, %8\n" /* Load old value. */ \
511 "\tand %2, %6, %0\n" /* Isolate the old value. */ \
512 "\tcmp %2, %4\n" /* Compare to expected value. */\
513 "\tbne 2f\n" /* Values are unequal. */ \
514 "\tand %2, %7, %0\n" /* Remove the old value. */ \
515 "\torr %2, %5\n" /* Put in the new value. */ \
516 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
517 "\tcmp %3, #0\n" /* Did it succeed? */ \
518 "\tbne 1b\n" /* Spin if failed. */ \
519 "2:" \
520 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
521 "=&r" (temp2) \
522 : "r" (expected32.v32), "r" (desired32.v32), \
523 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
524 return (get_##N(&old, mem)); \
525 }
526
527 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
528 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
529
530 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
531 uintN_t \
532 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
533 { \
534 uint32_t *mem32; \
535 reg_t val32, posmask, old; \
536 uint32_t negmask, temp1, temp2; \
537 \
538 mem32 = round_to_word(mem); \
539 val32.v32 = 0x00000000; \
540 put_##N(&val32, mem, val); \
541 posmask.v32 = 0x00000000; \
542 put_##N(&posmask, mem, ~0); \
543 negmask = ~posmask.v32; \
544 \
545 do_sync(); \
546 __asm volatile ( \
547 "1:" \
548 "\tldrex %0, %7\n" /* Load old value. */ \
549 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
550 "\tand %2, %5\n" /* Isolate the new value. */ \
551 "\tand %3, %6, %0\n" /* Remove the old value. */ \
552 "\torr %2, %2, %3\n" /* Put in the new value. */ \
553 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
554 "\tcmp %3, #0\n" /* Did it succeed? */ \
555 "\tbne 1b\n" /* Spin if failed. */ \
556 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
557 "=&r" (temp2) \
558 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
559 "m" (*mem32)); \
560 return (get_##N(&old, mem)); \
561 }
562
563 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
564 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
565 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
566 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
567
568 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
569 uintN_t \
570 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
571 { \
572 uint32_t *mem32; \
573 reg_t val32, old; \
574 uint32_t temp1, temp2; \
575 \
576 mem32 = round_to_word(mem); \
577 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
578 put_##N(&val32, mem, val); \
579 \
580 do_sync(); \
581 __asm volatile ( \
582 "1:" \
583 "\tldrex %0, %5\n" /* Load old value. */ \
584 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \
585 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
586 "\tcmp %3, #0\n" /* Did it succeed? */ \
587 "\tbne 1b\n" /* Spin if failed. */ \
588 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
589 "=&r" (temp2) \
590 : "r" (val32.v32), "m" (*mem32)); \
591 return (get_##N(&old, mem)); \
592 }
593
594 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
595 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
596 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
597 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
598 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
599 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
600
601 /*
602 * 32-bit routines.
603 */
604
605 uint32_t
606 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
607 {
608 uint32_t old, temp;
609
610 do_sync();
611 __asm volatile (
612 "1:"
613 "\tldrex %0, %4\n" /* Load old value. */
614 "\tstrex %2, %3, %1\n" /* Attempt to store. */
615 "\tcmp %2, #0\n" /* Did it succeed? */
616 "\tbne 1b\n" /* Spin if failed. */
617 : "=&r" (old), "=m" (*mem), "=&r" (temp)
618 : "r" (val), "m" (*mem));
619 return (old);
620 }
621
622 uint32_t
623 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
624 uint32_t desired)
625 {
626 uint32_t old, temp;
627
628 do_sync();
629 __asm volatile (
630 "1:"
631 "\tldrex %0, %5\n" /* Load old value. */
632 "\tcmp %0, %3\n" /* Compare to expected value. */
633 "\tbne 2f\n" /* Values are unequal. */
634 "\tstrex %2, %4, %1\n" /* Attempt to store. */
635 "\tcmp %2, #0\n" /* Did it succeed? */
636 "\tbne 1b\n" /* Spin if failed. */
637 "2:"
638 : "=&r" (old), "=m" (*mem), "=&r" (temp)
639 : "r" (expected), "r" (desired), "m" (*mem));
640 return (old);
641 }
642
643 #define EMIT_FETCH_AND_OP_4(name, op) \
644 uint32_t \
645 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \
646 { \
647 uint32_t old, temp1, temp2; \
648 \
649 do_sync(); \
650 __asm volatile ( \
651 "1:" \
652 "\tldrex %0, %5\n" /* Load old value. */ \
653 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
654 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
655 "\tcmp %3, #0\n" /* Did it succeed? */ \
656 "\tbne 1b\n" /* Spin if failed. */ \
657 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \
658 "=&r" (temp2) \
659 : "r" (val), "m" (*mem)); \
660 return (old); \
661 }
662
663 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
664 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
665 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
666 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
667 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
668
669 #ifndef __clang__
670 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
671 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
672 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
673 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
674 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
675 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
676 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
677 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
678 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
679 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
680 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
681 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
682 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
683 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
684 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
685 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
686 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
687 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
688 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
689 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
690 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
691 #endif
692
693 #else /* __ARM_ARCH_5__ */
694
695 #ifdef _KERNEL
696
697 #ifdef SMP
698 #error "On SMP systems we should have proper atomic operations."
699 #endif
700
701 /*
702 * On uniprocessor systems, we can perform the atomic operations by
703 * disabling interrupts.
704 */
705
706 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
707 uintN_t \
708 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \
709 uintN_t desired) \
710 { \
711 uintN_t ret; \
712 \
713 WITHOUT_INTERRUPTS({ \
714 ret = *mem; \
715 if (*mem == expected) \
716 *mem = desired; \
717 }); \
718 return (ret); \
719 }
720
721 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \
722 uintN_t \
723 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
724 { \
725 uintN_t ret; \
726 \
727 WITHOUT_INTERRUPTS({ \
728 ret = *mem; \
729 *mem op val; \
730 }); \
731 return (ret); \
732 }
733
734 #define EMIT_ALL_OPS_N(N, uintN_t) \
735 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
736 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \
737 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \
738 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \
739 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \
740 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \
741 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
742
743 EMIT_ALL_OPS_N(1, uint8_t)
744 EMIT_ALL_OPS_N(2, uint16_t)
745 EMIT_ALL_OPS_N(4, uint32_t)
746 EMIT_ALL_OPS_N(8, uint64_t)
747 #undef EMIT_ALL_OPS_N
748
749 #else /* !_KERNEL */
750
751 /*
752 * For userspace on uniprocessor systems, we can implement the atomic
753 * operations by using a Restartable Atomic Sequence. This makes the
754 * kernel restart the code from the beginning when interrupted.
755 */
756
757 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \
758 uintN_t \
759 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
760 { \
761 uint32_t old, temp, ras_start; \
762 \
763 ras_start = ARM_RAS_START; \
764 __asm volatile ( \
765 /* Set up Restartable Atomic Sequence. */ \
766 "1:" \
767 "\tadr %2, 1b\n" \
768 "\tstr %2, [%5]\n" \
769 "\tadr %2, 2f\n" \
770 "\tstr %2, [%5, #4]\n" \
771 \
772 "\t"ldr" %0, %4\n" /* Load old value. */ \
773 "\t"str" %3, %1\n" /* Store new value. */ \
774 \
775 /* Tear down Restartable Atomic Sequence. */ \
776 "2:" \
777 "\tmov %2, #0x00000000\n" \
778 "\tstr %2, [%5]\n" \
779 "\tmov %2, #0xffffffff\n" \
780 "\tstr %2, [%5, #4]\n" \
781 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
782 : "r" (val), "m" (*mem), "r" (ras_start)); \
783 return (old); \
784 }
785
786 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \
787 uintN_t \
788 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
789 uintN_t desired) \
790 { \
791 uint32_t old, temp, ras_start; \
792 \
793 ras_start = ARM_RAS_START; \
794 __asm volatile ( \
795 /* Set up Restartable Atomic Sequence. */ \
796 "1:" \
797 "\tadr %2, 1b\n" \
798 "\tstr %2, [%6]\n" \
799 "\tadr %2, 2f\n" \
800 "\tstr %2, [%6, #4]\n" \
801 \
802 "\t"ldr" %0, %5\n" /* Load old value. */ \
803 "\tcmp %0, %3\n" /* Compare to expected value. */\
804 "\t"streq" %4, %1\n" /* Store new value. */ \
805 \
806 /* Tear down Restartable Atomic Sequence. */ \
807 "2:" \
808 "\tmov %2, #0x00000000\n" \
809 "\tstr %2, [%6]\n" \
810 "\tmov %2, #0xffffffff\n" \
811 "\tstr %2, [%6, #4]\n" \
812 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
813 : "r" (expected), "r" (desired), "m" (*mem), \
814 "r" (ras_start)); \
815 return (old); \
816 }
817
818 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \
819 uintN_t \
820 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
821 { \
822 uint32_t old, temp, ras_start; \
823 \
824 ras_start = ARM_RAS_START; \
825 __asm volatile ( \
826 /* Set up Restartable Atomic Sequence. */ \
827 "1:" \
828 "\tadr %2, 1b\n" \
829 "\tstr %2, [%5]\n" \
830 "\tadr %2, 2f\n" \
831 "\tstr %2, [%5, #4]\n" \
832 \
833 "\t"ldr" %0, %4\n" /* Load old value. */ \
834 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \
835 "\t"str" %2, %1\n" /* Store new value. */ \
836 \
837 /* Tear down Restartable Atomic Sequence. */ \
838 "2:" \
839 "\tmov %2, #0x00000000\n" \
840 "\tstr %2, [%5]\n" \
841 "\tmov %2, #0xffffffff\n" \
842 "\tstr %2, [%5, #4]\n" \
843 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
844 : "r" (val), "m" (*mem), "r" (ras_start)); \
845 return (old); \
846 }
847
848 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \
849 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \
850 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \
851 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \
852 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \
853 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \
854 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \
855 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
856
857 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
858 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
859 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
860
861 #ifndef __clang__
862 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
863 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
864 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
865 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
866 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
867 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
868 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
869 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
870 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
871 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
872 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
873 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
874 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
875 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
876 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
877 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
878 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
879 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
880 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
881 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
882 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
883 #endif
884
885 #endif /* _KERNEL */
886
887 #endif
888
889 #endif /* __SYNC_ATOMICS */
Cache object: ff175ac64220cf34df6d0dd6879e8c07
|