#include <stdint.h>
#include <sys/cdefs.h>
#define __SYNC_ATOMICS
#define __strong_reference(sym,aliassym) \
extern __typeof (sym) aliassym __attribute__ ((__alias__ (#sym)))
#include <sys/param.h>
#include <sys/types.h>
#ifdef _KERNEL
#include "opt_global.h"
#endif
#if defined(_KERNEL) && !defined(SMP)
#define WITHOUT_INTERRUPTS(s) do { \
register_t regs; \
\
regs = intr_disable(); \
do s while (0); \
intr_restore(regs); \
} while (0)
#endif
#if defined(_KERNEL) && !defined(SMP)
static inline void
do_sync(void)
{
__asm volatile ("" : : : "memory");
}
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
static inline void
do_sync(void)
{
__asm volatile ("dmb" : : : "memory");
}
#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__)
static inline void
do_sync(void)
{
__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
}
#endif
#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__) || \
defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
#else
#ifdef __clang__
#pragma redefine_extname __sync_synchronize_ext __sync_synchronize
#define __sync_synchronize __sync_synchronize_ext
#endif
void
__sync_synchronize(void)
{
}
#ifdef _KERNEL
#ifdef SMP
#error "On SMP systems we should have proper atomic operations."
#endif
#define EMIT_LOAD_N(N, uintN_t) \
uintN_t \
__atomic_load_##N(uintN_t *mem, int model __unused) \
{ \
uintN_t ret; \
\
WITHOUT_INTERRUPTS({ \
ret = *mem; \
}); \
return (ret); \
}
#define EMIT_STORE_N(N, uintN_t) \
void \
__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \
{ \
\
WITHOUT_INTERRUPTS({ \
*mem = val; \
}); \
}
#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \
_Bool \
__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \
uintN_t desired, int success __unused, int failure __unused) \
{ \
_Bool ret; \
\
WITHOUT_INTERRUPTS({ \
if (*mem == *expected) { \
*mem = desired; \
ret = 1; \
} else { \
*expected = *mem; \
ret = 0; \
} \
}); \
return (ret); \
}
#define EMIT_FETCH_OP_N(N, uintN_t, name, op) \
uintN_t \
__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \
{ \
uintN_t ret; \
\
WITHOUT_INTERRUPTS({ \
ret = *mem; \
*mem op val; \
}); \
return (ret); \
}
#define EMIT_ALL_OPS_N(N, uintN_t) \
EMIT_LOAD_N(N, uintN_t) \
EMIT_STORE_N(N, uintN_t) \
EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \
EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \
EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \
EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \
EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \
EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \
EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
EMIT_ALL_OPS_N(1, uint8_t)
EMIT_ALL_OPS_N(2, uint16_t)
EMIT_ALL_OPS_N(4, uint32_t)
EMIT_ALL_OPS_N(8, uint64_t)
#undef EMIT_ALL_OPS_N
#else
#define EMIT_LOAD_N(N, uintN_t) \
uintN_t \
__atomic_load_##N(uintN_t *mem, int model __unused) \
{ \
\
return (*mem); \
}
#define EMIT_STORE_N(N, uintN_t) \
void \
__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \
{ \
\
*mem = val; \
}
#define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \
uintN_t \
__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \
{ \
uint32_t old, temp, ras_start; \
\
ras_start = ARM_RAS_START; \
__asm volatile ( \
\
"1:" \
"\tadr %2, 1b\n" \
"\tstr %2, [%5]\n" \
"\tadr %2, 2f\n" \
"\tstr %2, [%5, #4]\n" \
\
"\t"ldr" %0, %4\n" \
"\t"str" %3, %1\n" \
\
\
"2:" \
"\tmov %2, #0x00000000\n" \
"\tstr %2, [%5]\n" \
"\tmov %2, #0xffffffff\n" \
"\tstr %2, [%5, #4]\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp) \
: "r" (val), "m" (*mem), "r" (ras_start)); \
return (old); \
}
#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \
_Bool \
__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \
uintN_t desired, int success __unused, int failure __unused) \
{ \
uint32_t expected, old, temp, ras_start; \
\
expected = *pexpected; \
ras_start = ARM_RAS_START; \
__asm volatile ( \
\
"1:" \
"\tadr %2, 1b\n" \
"\tstr %2, [%6]\n" \
"\tadr %2, 2f\n" \
"\tstr %2, [%6, #4]\n" \
\
"\t"ldr" %0, %5\n" \
"\tcmp %0, %3\n" \
"\t"streq" %4, %1\n" \
\
\
"2:" \
"\tmov %2, #0x00000000\n" \
"\tstr %2, [%6]\n" \
"\tmov %2, #0xffffffff\n" \
"\tstr %2, [%6, #4]\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp) \
: "r" (expected), "r" (desired), "m" (*mem), \
"r" (ras_start)); \
if (old == expected) { \
return (1); \
} else { \
*pexpected = old; \
return (0); \
} \
}
#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \
uintN_t \
__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \
{ \
uint32_t old, temp, ras_start; \
\
ras_start = ARM_RAS_START; \
__asm volatile ( \
\
"1:" \
"\tadr %2, 1b\n" \
"\tstr %2, [%5]\n" \
"\tadr %2, 2f\n" \
"\tstr %2, [%5, #4]\n" \
\
"\t"ldr" %0, %4\n" \
"\t"op" %2, %0, %3\n" \
"\t"str" %2, %1\n" \
\
\
"2:" \
"\tmov %2, #0x00000000\n" \
"\tstr %2, [%5]\n" \
"\tmov %2, #0xffffffff\n" \
"\tstr %2, [%5, #4]\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp) \
: "r" (val), "m" (*mem), "r" (ras_start)); \
return (old); \
}
#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \
EMIT_LOAD_N(N, uintN_t) \
EMIT_STORE_N(N, uintN_t) \
EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \
EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \
EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \
EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \
EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \
EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \
EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
#undef EMIT_ALL_OPS_N
#endif
#endif
#endif
#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
#ifdef __clang__
#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
#pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
#pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
#pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
#pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
#pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
#pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
#pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
#pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
#pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
#pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
#pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
#pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
#pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
#pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
#pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
#pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
#pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
#pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
#endif
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__) || \
defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
typedef union {
uint8_t v8[4];
uint32_t v32;
} reg_t;
static inline uint32_t *
round_to_word(void *ptr)
{
return ((uint32_t *)((intptr_t)ptr & ~3));
}
static inline void
put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
{
size_t offset;
offset = (intptr_t)offset_ptr & 3;
r->v8[offset] = val;
}
static inline uint8_t
get_1(const reg_t *r, const uint8_t *offset_ptr)
{
size_t offset;
offset = (intptr_t)offset_ptr & 3;
return (r->v8[offset]);
}
static inline void
put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
{
size_t offset;
union {
uint16_t in;
uint8_t out[2];
} bytes;
offset = (intptr_t)offset_ptr & 3;
bytes.in = val;
r->v8[offset] = bytes.out[0];
r->v8[offset + 1] = bytes.out[1];
}
static inline uint16_t
get_2(const reg_t *r, const uint16_t *offset_ptr)
{
size_t offset;
union {
uint8_t in[2];
uint16_t out;
} bytes;
offset = (intptr_t)offset_ptr & 3;
bytes.in[0] = r->v8[offset];
bytes.in[1] = r->v8[offset + 1];
return (bytes.out);
}
#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
uintN_t \
__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
{ \
uint32_t *mem32; \
reg_t val32, negmask, old; \
uint32_t temp1, temp2; \
\
mem32 = round_to_word(mem); \
val32.v32 = 0x00000000; \
put_##N(&val32, mem, val); \
negmask.v32 = 0xffffffff; \
put_##N(&negmask, mem, 0); \
\
do_sync(); \
__asm volatile ( \
"1:" \
"\tldrex %0, %6\n" \
"\tand %2, %5, %0\n" \
"\torr %2, %2, %4\n" \
"\tstrex %3, %2, %1\n" \
"\tcmp %3, #0\n" \
"\tbne 1b\n" \
: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
"=&r" (temp2) \
: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
return (get_##N(&old, mem)); \
}
EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
uintN_t \
__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
uintN_t desired) \
{ \
uint32_t *mem32; \
reg_t expected32, desired32, posmask, old; \
uint32_t negmask, temp1, temp2; \
\
mem32 = round_to_word(mem); \
expected32.v32 = 0x00000000; \
put_##N(&expected32, mem, expected); \
desired32.v32 = 0x00000000; \
put_##N(&desired32, mem, desired); \
posmask.v32 = 0x00000000; \
put_##N(&posmask, mem, ~0); \
negmask = ~posmask.v32; \
\
do_sync(); \
__asm volatile ( \
"1:" \
"\tldrex %0, %8\n" \
"\tand %2, %6, %0\n" \
"\tcmp %2, %4\n" \
"\tbne 2f\n" \
"\tand %2, %7, %0\n" \
"\torr %2, %5\n" \
"\tstrex %3, %2, %1\n" \
"\tcmp %3, #0\n" \
"\tbne 1b\n" \
"2:" \
: "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
"=&r" (temp2) \
: "r" (expected32.v32), "r" (desired32.v32), \
"r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
return (get_##N(&old, mem)); \
}
EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
uintN_t \
__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
{ \
uint32_t *mem32; \
reg_t val32, posmask, old; \
uint32_t negmask, temp1, temp2; \
\
mem32 = round_to_word(mem); \
val32.v32 = 0x00000000; \
put_##N(&val32, mem, val); \
posmask.v32 = 0x00000000; \
put_##N(&posmask, mem, ~0); \
negmask = ~posmask.v32; \
\
do_sync(); \
__asm volatile ( \
"1:" \
"\tldrex %0, %7\n" \
"\t"op" %2, %0, %4\n" \
"\tand %2, %5\n" \
"\tand %3, %6, %0\n" \
"\torr %2, %2, %3\n" \
"\tstrex %3, %2, %1\n" \
"\tcmp %3, #0\n" \
"\tbne 1b\n" \
: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
"=&r" (temp2) \
: "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
"m" (*mem32)); \
return (get_##N(&old, mem)); \
}
EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
uintN_t \
__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
{ \
uint32_t *mem32; \
reg_t val32, old; \
uint32_t temp1, temp2; \
\
mem32 = round_to_word(mem); \
val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
put_##N(&val32, mem, val); \
\
do_sync(); \
__asm volatile ( \
"1:" \
"\tldrex %0, %5\n" \
"\t"op" %2, %4, %0\n" \
"\tstrex %3, %2, %1\n" \
"\tcmp %3, #0\n" \
"\tbne 1b\n" \
: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
"=&r" (temp2) \
: "r" (val32.v32), "m" (*mem32)); \
return (get_##N(&old, mem)); \
}
EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
uint32_t
__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
{
uint32_t old, temp;
do_sync();
__asm volatile (
"1:"
"\tldrex %0, %4\n"
"\tstrex %2, %3, %1\n"
"\tcmp %2, #0\n"
"\tbne 1b\n"
: "=&r" (old), "=m" (*mem), "=&r" (temp)
: "r" (val), "m" (*mem));
return (old);
}
uint32_t
__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
uint32_t desired)
{
uint32_t old, temp;
do_sync();
__asm volatile (
"1:"
"\tldrex %0, %5\n"
"\tcmp %0, %3\n"
"\tbne 2f\n"
"\tstrex %2, %4, %1\n"
"\tcmp %2, #0\n"
"\tbne 1b\n"
"2:"
: "=&r" (old), "=m" (*mem), "=&r" (temp)
: "r" (expected), "r" (desired), "m" (*mem));
return (old);
}
#define EMIT_FETCH_AND_OP_4(name, op) \
uint32_t \
__sync_##name##_4##_c(uint32_t *mem, uint32_t val) \
{ \
uint32_t old, temp1, temp2; \
\
do_sync(); \
__asm volatile ( \
"1:" \
"\tldrex %0, %5\n" \
"\t"op" %2, %0, %4\n" \
"\tstrex %3, %2, %1\n" \
"\tcmp %3, #0\n" \
"\tbne 1b\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp1), \
"=&r" (temp2) \
: "r" (val), "m" (*mem)); \
return (old); \
}
EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
#ifndef __clang__
__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
#endif
#else
#ifdef _KERNEL
#ifdef SMP
#error "On SMP systems we should have proper atomic operations."
#endif
#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
uintN_t \
__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \
uintN_t desired) \
{ \
uintN_t ret; \
\
WITHOUT_INTERRUPTS({ \
ret = *mem; \
if (*mem == expected) \
*mem = desired; \
}); \
return (ret); \
}
#define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \
uintN_t \
__sync_##name##_##N(uintN_t *mem, uintN_t val) \
{ \
uintN_t ret; \
\
WITHOUT_INTERRUPTS({ \
ret = *mem; \
*mem op val; \
}); \
return (ret); \
}
#define EMIT_ALL_OPS_N(N, uintN_t) \
EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \
EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \
EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \
EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \
EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \
EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
EMIT_ALL_OPS_N(1, uint8_t)
EMIT_ALL_OPS_N(2, uint16_t)
EMIT_ALL_OPS_N(4, uint32_t)
EMIT_ALL_OPS_N(8, uint64_t)
#undef EMIT_ALL_OPS_N
#else
#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \
uintN_t \
__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
{ \
uint32_t old, temp, ras_start; \
\
ras_start = ARM_RAS_START; \
__asm volatile ( \
\
"1:" \
"\tadr %2, 1b\n" \
"\tstr %2, [%5]\n" \
"\tadr %2, 2f\n" \
"\tstr %2, [%5, #4]\n" \
\
"\t"ldr" %0, %4\n" \
"\t"str" %3, %1\n" \
\
\
"2:" \
"\tmov %2, #0x00000000\n" \
"\tstr %2, [%5]\n" \
"\tmov %2, #0xffffffff\n" \
"\tstr %2, [%5, #4]\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp) \
: "r" (val), "m" (*mem), "r" (ras_start)); \
return (old); \
}
#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \
uintN_t \
__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
uintN_t desired) \
{ \
uint32_t old, temp, ras_start; \
\
ras_start = ARM_RAS_START; \
__asm volatile ( \
\
"1:" \
"\tadr %2, 1b\n" \
"\tstr %2, [%6]\n" \
"\tadr %2, 2f\n" \
"\tstr %2, [%6, #4]\n" \
\
"\t"ldr" %0, %5\n" \
"\tcmp %0, %3\n" \
"\t"streq" %4, %1\n" \
\
\
"2:" \
"\tmov %2, #0x00000000\n" \
"\tstr %2, [%6]\n" \
"\tmov %2, #0xffffffff\n" \
"\tstr %2, [%6, #4]\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp) \
: "r" (expected), "r" (desired), "m" (*mem), \
"r" (ras_start)); \
return (old); \
}
#define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \
uintN_t \
__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
{ \
uint32_t old, temp, ras_start; \
\
ras_start = ARM_RAS_START; \
__asm volatile ( \
\
"1:" \
"\tadr %2, 1b\n" \
"\tstr %2, [%5]\n" \
"\tadr %2, 2f\n" \
"\tstr %2, [%5, #4]\n" \
\
"\t"ldr" %0, %4\n" \
"\t"op" %2, %0, %3\n" \
"\t"str" %2, %1\n" \
\
\
"2:" \
"\tmov %2, #0x00000000\n" \
"\tstr %2, [%5]\n" \
"\tmov %2, #0xffffffff\n" \
"\tstr %2, [%5, #4]\n" \
: "=&r" (old), "=m" (*mem), "=&r" (temp) \
: "r" (val), "m" (*mem), "r" (ras_start)); \
return (old); \
}
#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \
EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \
EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \
EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \
EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \
EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \
EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \
EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
#ifndef __clang__
__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
#endif
#endif
#endif
#endif