1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Copyright (c) 1998 Doug Rabson
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/stdatomic.h>
36 #include <sys/types.h>
37
38 #ifndef _KERNEL
39 #include <stdbool.h>
40 #endif /* _KERNEL */
41
42 #if defined(__SYNC_ATOMICS)
43
44 /*
45 * Memory barriers.
46 *
47 * It turns out __sync_synchronize() does not emit any code when used
48 * with GCC 4.2. Implement our own version that does work reliably.
49 *
50 * Although __sync_lock_test_and_set() should only perform an acquire
51 * barrier, make it do a full barrier like the other functions. This
52 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
53 */
54
55 static inline void
56 do_sync(void)
57 {
58
59 __asm volatile (
60 #if !defined(_KERNEL) || defined(SMP)
61 ".set noreorder\n"
62 "\tsync\n"
63 "\tnop\n"
64 "\tnop\n"
65 "\tnop\n"
66 "\tnop\n"
67 "\tnop\n"
68 "\tnop\n"
69 "\tnop\n"
70 "\tnop\n"
71 ".set reorder\n"
72 #else /* _KERNEL && !SMP */
73 ""
74 #endif /* !KERNEL || SMP */
75 : : : "memory");
76 }
77
78 typedef union {
79 uint8_t v8[4];
80 uint32_t v32;
81 } reg_t;
82
83 /*
84 * Given a memory address pointing to an 8-bit or 16-bit integer, return
85 * the address of the 32-bit word containing it.
86 */
87
88 static inline uint32_t *
89 round_to_word(void *ptr)
90 {
91
92 return ((uint32_t *)((intptr_t)ptr & ~3));
93 }
94
95 /*
96 * Utility functions for loading and storing 8-bit and 16-bit integers
97 * in 32-bit words at an offset corresponding with the location of the
98 * atomic variable.
99 */
100
101 static inline void
102 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
103 {
104 size_t offset;
105
106 offset = (intptr_t)offset_ptr & 3;
107 r->v8[offset] = val;
108 }
109
110 static inline uint8_t
111 get_1(const reg_t *r, const uint8_t *offset_ptr)
112 {
113 size_t offset;
114
115 offset = (intptr_t)offset_ptr & 3;
116 return (r->v8[offset]);
117 }
118
119 static inline void
120 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
121 {
122 size_t offset;
123 union {
124 uint16_t in;
125 uint8_t out[2];
126 } bytes;
127
128 offset = (intptr_t)offset_ptr & 3;
129 bytes.in = val;
130 r->v8[offset] = bytes.out[0];
131 r->v8[offset + 1] = bytes.out[1];
132 }
133
134 static inline uint16_t
135 get_2(const reg_t *r, const uint16_t *offset_ptr)
136 {
137 size_t offset;
138 union {
139 uint8_t in[2];
140 uint16_t out;
141 } bytes;
142
143 offset = (intptr_t)offset_ptr & 3;
144 bytes.in[0] = r->v8[offset];
145 bytes.in[1] = r->v8[offset + 1];
146 return (bytes.out);
147 }
148
149 /*
150 * 8-bit and 16-bit routines.
151 *
152 * These operations are not natively supported by the CPU, so we use
153 * some shifting and bitmasking on top of the 32-bit instructions.
154 */
155
156 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
157 uintN_t \
158 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \
159 { \
160 uint32_t *mem32; \
161 reg_t val32, negmask, old; \
162 uint32_t temp; \
163 \
164 mem32 = round_to_word(mem); \
165 val32.v32 = 0x00000000; \
166 put_##N(&val32, mem, val); \
167 negmask.v32 = 0xffffffff; \
168 put_##N(&negmask, mem, 0); \
169 \
170 do_sync(); \
171 __asm volatile ( \
172 "1:" \
173 "\tll %0, %5\n" /* Load old value. */ \
174 "\tand %2, %4, %0\n" /* Remove the old value. */ \
175 "\tor %2, %3\n" /* Put in the new value. */ \
176 "\tsc %2, %1\n" /* Attempt to store. */ \
177 "\tbeqz %2, 1b\n" /* Spin if failed. */ \
178 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp) \
179 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
180 return (get_##N(&old, mem)); \
181 }
182
183 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
184 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
185
186 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
187 uintN_t \
188 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \
189 uintN_t desired) \
190 { \
191 uint32_t *mem32; \
192 reg_t expected32, desired32, posmask, old; \
193 uint32_t negmask, temp; \
194 \
195 mem32 = round_to_word(mem); \
196 expected32.v32 = 0x00000000; \
197 put_##N(&expected32, mem, expected); \
198 desired32.v32 = 0x00000000; \
199 put_##N(&desired32, mem, desired); \
200 posmask.v32 = 0x00000000; \
201 put_##N(&posmask, mem, ~0); \
202 negmask = ~posmask.v32; \
203 \
204 do_sync(); \
205 __asm volatile ( \
206 "1:" \
207 "\tll %0, %7\n" /* Load old value. */ \
208 "\tand %2, %5, %0\n" /* Isolate the old value. */ \
209 "\tbne %2, %3, 2f\n" /* Compare to expected value. */\
210 "\tand %2, %6, %0\n" /* Remove the old value. */ \
211 "\tor %2, %4\n" /* Put in the new value. */ \
212 "\tsc %2, %1\n" /* Attempt to store. */ \
213 "\tbeqz %2, 1b\n" /* Spin if failed. */ \
214 "2:" \
215 : "=&r" (old), "=m" (*mem32), "=&r" (temp) \
216 : "r" (expected32.v32), "r" (desired32.v32), \
217 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
218 return (get_##N(&old, mem)); \
219 }
220
221 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
222 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
223
224 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
225 uintN_t \
226 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
227 { \
228 uint32_t *mem32; \
229 reg_t val32, posmask, old; \
230 uint32_t negmask, temp1, temp2; \
231 \
232 mem32 = round_to_word(mem); \
233 val32.v32 = 0x00000000; \
234 put_##N(&val32, mem, val); \
235 posmask.v32 = 0x00000000; \
236 put_##N(&posmask, mem, ~0); \
237 negmask = ~posmask.v32; \
238 \
239 do_sync(); \
240 __asm volatile ( \
241 "1:" \
242 "\tll %0, %7\n" /* Load old value. */ \
243 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
244 "\tand %2, %5\n" /* Isolate the new value. */ \
245 "\tand %3, %6, %0\n" /* Remove the old value. */ \
246 "\tor %2, %3\n" /* Put in the new value. */ \
247 "\tsc %2, %1\n" /* Attempt to store. */ \
248 "\tbeqz %2, 1b\n" /* Spin if failed. */ \
249 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
250 "=&r" (temp2) \
251 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
252 "m" (*mem32)); \
253 return (get_##N(&old, mem)); \
254 }
255
256 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "addu")
257 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "subu")
258 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "addu")
259 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "subu")
260
261 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
262 uintN_t \
263 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
264 { \
265 uint32_t *mem32; \
266 reg_t val32, old; \
267 uint32_t temp; \
268 \
269 mem32 = round_to_word(mem); \
270 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
271 put_##N(&val32, mem, val); \
272 \
273 do_sync(); \
274 __asm volatile ( \
275 "1:" \
276 "\tll %0, %4\n" /* Load old value. */ \
277 "\t"op" %2, %3, %0\n" /* Calculate new value. */ \
278 "\tsc %2, %1\n" /* Attempt to store. */ \
279 "\tbeqz %2, 1b\n" /* Spin if failed. */ \
280 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp) \
281 : "r" (val32.v32), "m" (*mem32)); \
282 return (get_##N(&old, mem)); \
283 }
284
285 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
286 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "or", 0)
287 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "xor", 0)
288 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
289 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "or", 0)
290 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "xor", 0)
291
292 /*
293 * 32-bit routines.
294 */
295
296 static __inline uint32_t
297 do_compare_and_swap_4(uint32_t *mem, uint32_t expected,
298 uint32_t desired)
299 {
300 uint32_t old, temp;
301
302 do_sync();
303 __asm volatile (
304 "1:"
305 "\tll %0, %5\n" /* Load old value. */
306 "\tbne %0, %3, 2f\n" /* Compare to expected value. */
307 "\tmove %2, %4\n" /* Value to store. */
308 "\tsc %2, %1\n" /* Attempt to store. */
309 "\tbeqz %2, 1b\n" /* Spin if failed. */
310 "2:"
311 : "=&r" (old), "=m" (*mem), "=&r" (temp)
312 : "r" (expected), "r" (desired), "m" (*mem));
313 return (old);
314 }
315
316 uint32_t
317 __sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
318 uint32_t desired)
319 {
320
321 return (do_compare_and_swap_4(mem, expected, desired));
322 }
323
324 bool
325 __sync_bool_compare_and_swap_4(uint32_t *mem, uint32_t expected,
326 uint32_t desired)
327 {
328
329 return (do_compare_and_swap_4(mem, expected, desired) ==
330 expected);
331 }
332
333 #define EMIT_FETCH_AND_OP_4(name, op) \
334 uint32_t \
335 __sync_##name##_4(uint32_t *mem, uint32_t val) \
336 { \
337 uint32_t old, temp; \
338 \
339 do_sync(); \
340 __asm volatile ( \
341 "1:" \
342 "\tll %0, %4\n" /* Load old value. */ \
343 "\t"op"\n" /* Calculate new value. */ \
344 "\tsc %2, %1\n" /* Attempt to store. */ \
345 "\tbeqz %2, 1b\n" /* Spin if failed. */ \
346 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
347 : "r" (val), "m" (*mem)); \
348 return (old); \
349 }
350
351 EMIT_FETCH_AND_OP_4(lock_test_and_set, "move %2, %3")
352 EMIT_FETCH_AND_OP_4(fetch_and_add, "addu %2, %0, %3")
353 EMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %3")
354 EMIT_FETCH_AND_OP_4(fetch_and_or, "or %2, %0, %3")
355 EMIT_FETCH_AND_OP_4(fetch_and_sub, "subu %2, %0, %3")
356 EMIT_FETCH_AND_OP_4(fetch_and_xor, "xor %2, %0, %3")
357
358 /*
359 * 64-bit routines.
360 *
361 * Note: All the 64-bit atomic operations are only atomic when running
362 * in 64-bit mode. It is assumed that code compiled for n32 and n64 fits
363 * into this definition and no further safeties are needed.
364 */
365
366 #if defined(__mips_n32) || defined(__mips_n64)
367
368 uint64_t
369 __sync_val_compare_and_swap_8(uint64_t *mem, uint64_t expected,
370 uint64_t desired)
371 {
372 uint64_t old, temp;
373
374 do_sync();
375 __asm volatile (
376 "1:"
377 "\tlld %0, %5\n" /* Load old value. */
378 "\tbne %0, %3, 2f\n" /* Compare to expected value. */
379 "\tmove %2, %4\n" /* Value to store. */
380 "\tscd %2, %1\n" /* Attempt to store. */
381 "\tbeqz %2, 1b\n" /* Spin if failed. */
382 "2:"
383 : "=&r" (old), "=m" (*mem), "=&r" (temp)
384 : "r" (expected), "r" (desired), "m" (*mem));
385 return (old);
386 }
387
388 #define EMIT_FETCH_AND_OP_8(name, op) \
389 uint64_t \
390 __sync_##name##_8(uint64_t *mem, uint64_t val) \
391 { \
392 uint64_t old, temp; \
393 \
394 do_sync(); \
395 __asm volatile ( \
396 "1:" \
397 "\tlld %0, %4\n" /* Load old value. */ \
398 "\t"op"\n" /* Calculate new value. */ \
399 "\tscd %2, %1\n" /* Attempt to store. */ \
400 "\tbeqz %2, 1b\n" /* Spin if failed. */ \
401 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
402 : "r" (val), "m" (*mem)); \
403 return (old); \
404 }
405
406 EMIT_FETCH_AND_OP_8(lock_test_and_set, "move %2, %3")
407 EMIT_FETCH_AND_OP_8(fetch_and_add, "daddu %2, %0, %3")
408 EMIT_FETCH_AND_OP_8(fetch_and_and, "and %2, %0, %3")
409 EMIT_FETCH_AND_OP_8(fetch_and_or, "or %2, %0, %3")
410 EMIT_FETCH_AND_OP_8(fetch_and_sub, "dsubu %2, %0, %3")
411 EMIT_FETCH_AND_OP_8(fetch_and_xor, "xor %2, %0, %3")
412
413 #endif /* __mips_n32 || __mips_n64 */
414
415 #endif /* __SYNC_ATOMICS */
Cache object: c856b91129672c3743e22f656fc10a9e
|