From: Ivan Maidanski Date: Mon, 11 Apr 2016 19:20:07 +0000 (+0300) Subject: Use GCC atomic intrinsics for ARM (gcc 4.8+ and clang 3.5+) X-Git-Tag: v7.6.0~221 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3c52619c1796c8f6ecd43fa6c9d481cc4c026788;p=libatomic_ops Use GCC atomic intrinsics for ARM (gcc 4.8+ and clang 3.5+) * src/atomic_ops/sysdeps/gcc/arm.h (AO_GCC_ATOMIC_TEST_AND_SET): Define if gcc 4.8+ or clang 3.5+ or AO_PREFER_BUILTIN_ATOMICS unless AO_DISABLE_GCC_ATOMICS. * src/atomic_ops/sysdeps/gcc/arm.h (AO_nop_write, AO_store, AO_char_store, AO_short_store): Do not define (in assembly code) if AO_PREFER_BUILTIN_ATOMICS. * src/atomic_ops/sysdeps/gcc/arm.h (AO_SKIPATOMIC_store, AO_SKIPATOMIC_store_release, AO_SKIPATOMIC_char_store, AO_SKIPATOMIC_char_store_release, AO_SKIPATOMIC_short_store, AO_SKIPATOMIC_short_store_release, AO_SKIPATOMIC_int_store, AO_SKIPATOMIC_int_store_release): Define if AO_BROKEN_TASKSWITCH_CLREX. * src/atomic_ops/sysdeps/gcc/arm.h (AO_nop_full, AO_test_and_set, AO_fetch_and_add, AO_fetch_and_add1, AO_fetch_and_sub1, AO_and, AO_or, AO_xor, AO_char_fetch_and_add, AO_short_fetch_and_add, AO_compare_and_swap, AO_fetch_compare_and_swap, AO_double_load, AO_double_store, AO_double_compare_and_swap, AO_test_and_set_full): Do not define (in assembly code) if AO_GCC_ATOMIC_TEST_AND_SET. * src/atomic_ops/sysdeps/gcc/arm.h: Do not include atomic_store.h, all_atomic_only_load.h, char_atomic_store.h, short_atomic_store.h, all_aligned_atomic_load_store.h if AO_GCC_ATOMIC_TEST_AND_SET. * src/atomic_ops/sysdeps/gcc/arm.h: Include generic.h (at the end of the file) if AO_GCC_ATOMIC_TEST_AND_SET. * src/atomic_ops/sysdeps/gcc/generic-small.h: Regenerate. * src/atomic_ops/sysdeps/gcc/generic-small.template (AO_XSIZE_store): Do not define if AO_SKIPATOMIC_XSIZE_store. * src/atomic_ops/sysdeps/gcc/generic-small.template (AO_XSIZE_store_release): Do not define if AO_SKIPATOMIC_XSIZE_store_release. --- diff --git a/src/atomic_ops/sysdeps/gcc/arm.h b/src/atomic_ops/sysdeps/gcc/arm.h index ae4dced..5141d93 100644 --- a/src/atomic_ops/sysdeps/gcc/arm.h +++ b/src/atomic_ops/sysdeps/gcc/arm.h @@ -15,6 +15,16 @@ * */ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) \ + || __clang_major__ > 3 \ + || (__clang_major__ == 3 && __clang_minor__ >= 5) \ + || (defined(AO_PREFER_BUILTIN_ATOMICS) \ + && __GNUC__ == 4 && __GNUC_MINOR__ >= 2)) \ + && !defined(AO_DISABLE_GCC_ATOMICS) + /* Probably, it could be enabled even for earlier gcc/clang versions. */ +# define AO_GCC_ATOMIC_TEST_AND_SET +#endif + #include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */ #ifdef __native_client__ @@ -95,6 +105,28 @@ /* Also, SWP is obsoleted for ARMv8+. */ #endif /* !__thumb2__ */ +#if !defined(AO_UNIPROCESSOR) && defined(AO_ARM_HAVE_DMB) \ + && !defined(AO_PREFER_BUILTIN_ATOMICS) + AO_INLINE void + AO_nop_write(void) + { + /* AO_THUMB_GO_ARM is empty. */ + /* This will target the system domain and thus be overly */ + /* conservative as the CPUs will occupy the inner shareable domain. */ + /* The plain variant (dmb st) is theoretically slower, and should */ + /* not be needed. That said, with limited experimentation, a CPU */ + /* implementation for which it actually matters has not been found */ + /* yet, though they should already exist. */ + /* Anyway, note that the "st" and "ishst" barriers are actually */ + /* quite weak and, as the libatomic_ops documentation states, */ + /* usually not what you really want. */ + __asm__ __volatile__("dmb ishst" : : : "memory"); + } +# define AO_HAVE_nop_write +#endif /* AO_ARM_HAVE_DMB */ + +#ifndef AO_GCC_ATOMIC_TEST_AND_SET + #ifdef AO_UNIPROCESSOR /* If only a single processor (core) is used, AO_UNIPROCESSOR could */ /* be defined by the client to avoid unnecessary memory barrier. */ @@ -118,23 +150,6 @@ } # define AO_HAVE_nop_full - AO_INLINE void - AO_nop_write(void) - { - /* AO_THUMB_GO_ARM is empty. */ - /* This will target the system domain and thus be overly */ - /* conservative as the CPUs will occupy the inner shareable domain. */ - /* The plain variant (dmb st) is theoretically slower, and should */ - /* not be needed. That said, with limited experimentation, a CPU */ - /* implementation for which it actually matters has not been found */ - /* yet, though they should already exist. */ - /* Anyway, note that the "st" and "ishst" barriers are actually */ - /* quite weak and, as the libatomic_ops documentation states, */ - /* usually not what you really want. */ - __asm__ __volatile__("dmb ishst" : : : "memory"); - } -# define AO_HAVE_nop_write - #elif defined(AO_ARM_HAVE_LDREX) /* ARMv6 is the first architecture providing support for a simple */ /* LL/SC. A data memory barrier must be raised via CP15 command. */ @@ -159,14 +174,9 @@ /* AO_nop_full() is emulated using AO_test_and_set_full(). */ #endif /* !AO_UNIPROCESSOR && !AO_ARM_HAVE_LDREX */ -#ifdef AO_ARM_HAVE_LDREX +#endif /* !AO_GCC_ATOMIC_TEST_AND_SET */ - /* AO_t/char/short/int load is simple reading. */ - /* Unaligned accesses are not guaranteed to be atomic. */ -# define AO_ACCESS_CHECK_ALIGNED -# define AO_ACCESS_short_CHECK_ALIGNED -# define AO_ACCESS_int_CHECK_ALIGNED -# include "../all_atomic_only_load.h" +#ifdef AO_ARM_HAVE_LDREX /* "ARM Architecture Reference Manual" (chapter A3.5.3) says that the */ /* single-copy atomic processor accesses are all byte accesses, all */ @@ -184,6 +194,18 @@ /* arch/arm/kernel/entry-header.S of Linux. Nonetheless, there is */ /* a doubt this was properly implemented in some ancient OS releases. */ # ifdef AO_BROKEN_TASKSWITCH_CLREX + +# define AO_SKIPATOMIC_store +# define AO_SKIPATOMIC_store_release +# define AO_SKIPATOMIC_char_store +# define AO_SKIPATOMIC_char_store_release +# define AO_SKIPATOMIC_short_store +# define AO_SKIPATOMIC_short_store_release +# define AO_SKIPATOMIC_int_store +# define AO_SKIPATOMIC_int_store_release + +# ifndef AO_PREFER_BUILTIN_ATOMICS + AO_INLINE void AO_store(volatile AO_t *addr, AO_t value) { int flag; @@ -248,11 +270,26 @@ # define AO_HAVE_short_store # endif /* AO_ARM_HAVE_LDREXBH */ -# else +# endif /* !AO_PREFER_BUILTIN_ATOMICS */ + +# elif !defined(AO_GCC_ATOMIC_TEST_AND_SET) # include "../loadstore/atomic_store.h" /* AO_int_store is defined in ao_t_is_int.h. */ # endif /* !AO_BROKEN_TASKSWITCH_CLREX */ +#endif /* AO_ARM_HAVE_LDREX */ + +#ifndef AO_GCC_ATOMIC_TEST_AND_SET + +#ifdef AO_ARM_HAVE_LDREX + + /* AO_t/char/short/int load is simple reading. */ + /* Unaligned accesses are not guaranteed to be atomic. */ +# define AO_ACCESS_CHECK_ALIGNED +# define AO_ACCESS_short_CHECK_ALIGNED +# define AO_ACCESS_int_CHECK_ALIGNED +# include "../all_atomic_only_load.h" + # ifndef AO_HAVE_char_store # include "../loadstore/char_atomic_store.h" # include "../loadstore/short_atomic_store.h" @@ -666,4 +703,13 @@ AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) # define AO_HAVE_test_and_set_full #endif /* !AO_HAVE_test_and_set[_full] && AO_ARM_HAVE_SWP */ +#else /* AO_GCC_ATOMIC_TEST_AND_SET */ + +# ifdef AO_ARM_HAVE_LDREXD +# include "../standard_ao_double_t.h" +# endif +# include "generic.h" + +#endif /* AO_GCC_ATOMIC_TEST_AND_SET */ + #define AO_T_IS_INT diff --git a/src/atomic_ops/sysdeps/gcc/generic-small.h b/src/atomic_ops/sysdeps/gcc/generic-small.h index ad34568..ef91e5a 100644 --- a/src/atomic_ops/sysdeps/gcc/generic-small.h +++ b/src/atomic_ops/sysdeps/gcc/generic-small.h @@ -33,19 +33,23 @@ AO_char_load_acquire(const volatile unsigned/**/char *addr) /* char_load_read is defined using load and nop_read. */ /* char_store_full definition is omitted similar to load_full reason. */ -AO_INLINE void -AO_char_store(volatile unsigned/**/char *addr, unsigned/**/char value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELAXED); -} -#define AO_HAVE_char_store +#ifndef AO_SKIPATOMIC_char_store + AO_INLINE void + AO_char_store(volatile unsigned/**/char *addr, unsigned/**/char value) + { + __atomic_store_n(addr, value, __ATOMIC_RELAXED); + } +# define AO_HAVE_char_store +#endif -AO_INLINE void -AO_char_store_release(volatile unsigned/**/char *addr, unsigned/**/char value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELEASE); -} -#define AO_HAVE_char_store_release +#ifndef AO_SKIPATOMIC_char_store_release + AO_INLINE void + AO_char_store_release(volatile unsigned/**/char *addr, unsigned/**/char value) + { + __atomic_store_n(addr, value, __ATOMIC_RELEASE); + } +# define AO_HAVE_char_store_release +#endif #ifdef AO_GCC_HAVE_char_SYNC_CAS @@ -172,19 +176,23 @@ AO_short_load_acquire(const volatile unsigned/**/short *addr) /* short_load_read is defined using load and nop_read. */ /* short_store_full definition is omitted similar to load_full reason. */ -AO_INLINE void -AO_short_store(volatile unsigned/**/short *addr, unsigned/**/short value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELAXED); -} -#define AO_HAVE_short_store +#ifndef AO_SKIPATOMIC_short_store + AO_INLINE void + AO_short_store(volatile unsigned/**/short *addr, unsigned/**/short value) + { + __atomic_store_n(addr, value, __ATOMIC_RELAXED); + } +# define AO_HAVE_short_store +#endif -AO_INLINE void -AO_short_store_release(volatile unsigned/**/short *addr, unsigned/**/short value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELEASE); -} -#define AO_HAVE_short_store_release +#ifndef AO_SKIPATOMIC_short_store_release + AO_INLINE void + AO_short_store_release(volatile unsigned/**/short *addr, unsigned/**/short value) + { + __atomic_store_n(addr, value, __ATOMIC_RELEASE); + } +# define AO_HAVE_short_store_release +#endif #ifdef AO_GCC_HAVE_short_SYNC_CAS @@ -311,19 +319,23 @@ AO_int_load_acquire(const volatile unsigned *addr) /* int_load_read is defined using load and nop_read. */ /* int_store_full definition is omitted similar to load_full reason. */ -AO_INLINE void -AO_int_store(volatile unsigned *addr, unsigned value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELAXED); -} -#define AO_HAVE_int_store +#ifndef AO_SKIPATOMIC_int_store + AO_INLINE void + AO_int_store(volatile unsigned *addr, unsigned value) + { + __atomic_store_n(addr, value, __ATOMIC_RELAXED); + } +# define AO_HAVE_int_store +#endif -AO_INLINE void -AO_int_store_release(volatile unsigned *addr, unsigned value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELEASE); -} -#define AO_HAVE_int_store_release +#ifndef AO_SKIPATOMIC_int_store_release + AO_INLINE void + AO_int_store_release(volatile unsigned *addr, unsigned value) + { + __atomic_store_n(addr, value, __ATOMIC_RELEASE); + } +# define AO_HAVE_int_store_release +#endif #ifdef AO_GCC_HAVE_int_SYNC_CAS @@ -450,19 +462,23 @@ AO_load_acquire(const volatile AO_t *addr) /* load_read is defined using load and nop_read. */ /* store_full definition is omitted similar to load_full reason. */ -AO_INLINE void -AO_store(volatile AO_t *addr, AO_t value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELAXED); -} -#define AO_HAVE_store +#ifndef AO_SKIPATOMIC_store + AO_INLINE void + AO_store(volatile AO_t *addr, AO_t value) + { + __atomic_store_n(addr, value, __ATOMIC_RELAXED); + } +# define AO_HAVE_store +#endif -AO_INLINE void -AO_store_release(volatile AO_t *addr, AO_t value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELEASE); -} -#define AO_HAVE_store_release +#ifndef AO_SKIPATOMIC_store_release + AO_INLINE void + AO_store_release(volatile AO_t *addr, AO_t value) + { + __atomic_store_n(addr, value, __ATOMIC_RELEASE); + } +# define AO_HAVE_store_release +#endif #ifdef AO_GCC_HAVE_SYNC_CAS diff --git a/src/atomic_ops/sysdeps/gcc/generic-small.template b/src/atomic_ops/sysdeps/gcc/generic-small.template index dd17d25..00296b6 100644 --- a/src/atomic_ops/sysdeps/gcc/generic-small.template +++ b/src/atomic_ops/sysdeps/gcc/generic-small.template @@ -33,19 +33,23 @@ AO_XSIZE_load_acquire(const volatile XCTYPE *addr) /* XSIZE_load_read is defined using load and nop_read. */ /* XSIZE_store_full definition is omitted similar to load_full reason. */ -AO_INLINE void -AO_XSIZE_store(volatile XCTYPE *addr, XCTYPE value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELAXED); -} -#define AO_HAVE_XSIZE_store +#ifndef AO_SKIPATOMIC_XSIZE_store + AO_INLINE void + AO_XSIZE_store(volatile XCTYPE *addr, XCTYPE value) + { + __atomic_store_n(addr, value, __ATOMIC_RELAXED); + } +# define AO_HAVE_XSIZE_store +#endif -AO_INLINE void -AO_XSIZE_store_release(volatile XCTYPE *addr, XCTYPE value) -{ - __atomic_store_n(addr, value, __ATOMIC_RELEASE); -} -#define AO_HAVE_XSIZE_store_release +#ifndef AO_SKIPATOMIC_XSIZE_store_release + AO_INLINE void + AO_XSIZE_store_release(volatile XCTYPE *addr, XCTYPE value) + { + __atomic_store_n(addr, value, __ATOMIC_RELEASE); + } +# define AO_HAVE_XSIZE_store_release +#endif #ifdef AO_GCC_HAVE_XSIZE_SYNC_CAS