From c69538936623d2a41f71c88b50e5ce6c8ba2d5e1 Mon Sep 17 00:00:00 2001 From: hboehm Date: Tue, 11 Nov 2008 00:27:48 +0000 Subject: [PATCH] 2008-11-10 Hans Boehm (Really Joerg Wagner) * src/atomic_ops/sysdeps/armcc/arm_v6.h: Compute AO_compare_and_swap value differently, add AO_compare_double_and_swap_double, some indentation fixes. * src/atomic_ops/sysdeps/gcc/arm.h: Make gcc asm code more robust and minimize clobbers, Add AO_compare_double_and_swap_double. --- ChangeLog | 7 ++ src/atomic_ops/sysdeps/armcc/arm_v6.h | 59 +++++++++--- src/atomic_ops/sysdeps/gcc/arm.h | 125 ++++++++++++++++---------- 3 files changed, 132 insertions(+), 59 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6914709..3f6aa7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2008-11-10 Hans Boehm (Really Joerg Wagner) + * src/atomic_ops/sysdeps/armcc/arm_v6.h: Compute + AO_compare_and_swap value differently, add + AO_compare_double_and_swap_double, some indentation fixes. + * src/atomic_ops/sysdeps/gcc/arm.h: Make gcc asm code more + robust and minimize clobbers, Add AO_compare_double_and_swap_double. + 2008-11-06 Hans Boehm * INSTALL: Add some platform-specific documentation. * src/Makefile.msft: Fix copyright notice. diff --git a/src/atomic_ops/sysdeps/armcc/arm_v6.h b/src/atomic_ops/sysdeps/armcc/arm_v6.h index 326506c..469b994 100644 --- a/src/atomic_ops/sysdeps/armcc/arm_v6.h +++ b/src/atomic_ops/sysdeps/armcc/arm_v6.h @@ -24,15 +24,17 @@ #if __TARGET_ARCH_ARM < 6 Dont use with ARM instruction sets lower than v6 -#endif +#else + +#include "../standard_ao_double_t.h" /* NEC LE-IT: ARMv6 is the first architecture providing support for simple LL/SC * A data memory barrier must be raised via CP15 command (see documentation). - * + * * ARMv7 is compatible to ARMv6 but has a simpler command for issuing a * memory barrier (DMB). Raising it via CP15 should still work as told me by the * support engineers. If it turns out to be much quicker than we should implement - * custom code for ARMv7 using the asm { dmb } command. + * custom code for ARMv7 using the asm { dmb } command. * * If only a single processor is used, we can define AO_UNIPROCESSOR * and do not need to access CP15 for ensuring a DMB at all. @@ -41,12 +43,12 @@ Dont use with ARM instruction sets lower than v6 AO_INLINE void AO_nop_full() { -# ifndef AO_UNIPROCESSOR - unsigned int dest=0; - /* issue an data memory barrier (keeps ordering of memory transactions */ - /* before and after this operation) */ +#ifndef AO_UNIPROCESSOR + unsigned int dest=0; + /* issue an data memory barrier (keeps ordering of memory transactions */ + /* before and after this operation) */ __asm { mcr p15,0,dest,c7,c10,5 } ; -# endif +#endif } #define AO_HAVE_nop_full @@ -54,8 +56,8 @@ AO_nop_full() AO_INLINE AO_t AO_load(const volatile AO_t *addr) { - /* Cast away the volatile in case it adds fence semantics. */ - return (*(const AO_t *)addr); + /* Cast away the volatile in case it adds fence semantics */ + return (*(const AO_t *)addr); } #define AO_HAVE_load @@ -184,16 +186,49 @@ AO_compare_and_swap(volatile AO_t *addr, retry: __asm__ { - ldrex tmp, [addr] mov result, #2 + ldrex tmp, [addr] teq tmp, old_val strexeq result, new_val, [addr] teq result, #1 beq retry } - return (result^2)>>1; + return !(result&2); } #define AO_HAVE_compare_and_swap +/* helper functions for the Realview compiler: LDREXD is not usable + * with inline assembler, so use the "embedded" assembler as + * suggested by ARM Dev. support (June 2008). */ +__asm inline double_ptr_storage load_ex(volatile AO_double_t *addr) { + LDREXD r0,r1,[r0] +} + +__asm inline int store_ex(AO_t val1, AO_t val2, volatile AO_double_t *addr) { + STREXD r3,r0,r1,[r2] + MOV r0,r3 +} + +AO_INLINE int +AO_compare_double_and_swap_double(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2) +{ + double_ptr_storage old_val = ((double_ptr_storage)old_val2 << 32) | old_val1; + + double_ptr_storage tmp; + int result; + + while(1) { + tmp = load_ex(addr); + if(tmp != old_val) return false; + result = store_ex(new_val1, new_val2, addr); + if(!result) return true; + } +} + +#define AO_HAVE_compare_double_and_swap_double + + #endif // __TARGET_ARCH_ARM diff --git a/src/atomic_ops/sysdeps/gcc/arm.h b/src/atomic_ops/sysdeps/gcc/arm.h index 5b57048..b9d5d9e 100644 --- a/src/atomic_ops/sysdeps/gcc/arm.h +++ b/src/atomic_ops/sysdeps/gcc/arm.h @@ -34,6 +34,9 @@ /* NEC LE-IT: gcc has no way to easily check the arm architecture * but defines only one of __ARM_ARCH_x__ to be true */ #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_7__) + +#include "../standard_ao_double_t.h" + AO_INLINE void AO_nop_full() { @@ -75,7 +78,7 @@ AO_load(const volatile AO_t *addr) STR(x) STREX(x) Yes ----------------------------------- - + * ARMv7 behaves similar, see documentation CortexA8 TRM, point 8.5 * * HB: I think this is only a problem if interrupt handlers do not clear @@ -84,16 +87,16 @@ AO_load(const volatile AO_t *addr) */ AO_INLINE void AO_store(volatile AO_t *addr, AO_t value) { - unsigned long tmp; - + AO_t flag; + __asm__ __volatile__("@AO_store\n" -"1: ldrex %0, [%1]\n" -" strex %0, %2, [%1]\n" +"1: ldrex %0, [%2]\n" +" strex %0, %3, [%2]\n" " teq %0, #0\n" " bne 1b" - : "=&r"(tmp) + : "=&r"(flag), "+m"(*addr) : "r" (addr), "r"(value) - : "cc","memory"); + : "cc"); } #define AO_HAVE_store @@ -113,16 +116,16 @@ AO_INLINE AO_TS_t AO_test_and_set(volatile AO_TS_t *addr) { AO_TS_t oldval; - unsigned long tmp; + unsigned long flag; __asm__ __volatile__("@AO_test_and_set\n" -"1: ldrex %0, [%2]\n" -" strex %1, %3, [%2]\n" -" teq %1, #0\n" -" bne 1b\n" - : "=&r"(oldval),"=&r"(tmp) +"1: ldrex %0, [%3]\n" +" strex %1, %4, [%3]\n" +" teq %1, #0\n" +" bne 1b\n" + : "=&r"(oldval),"=&r"(flag), "+m"(*addr) : "r"(addr), "r"(1) - : "memory","cc"); + : "cc"); return oldval; } @@ -133,18 +136,18 @@ AO_test_and_set(volatile AO_TS_t *addr) { AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *p, AO_t incr) { - unsigned long tmp,tmp2; + unsigned long flag,tmp; AO_t result; __asm__ __volatile__("@AO_fetch_and_add\n" -"1: ldrex %0, [%4]\n" /* get original */ -" add %2, %3, %0\n" /* sum up */ -" strex %1, %2, [%4]\n" /* store them */ -" teq %1, #0\n" -" bne 1b\n" - : "=&r"(result),"=&r"(tmp),"=&r"(tmp2) - : "r"(incr), "r"(p) - : "cc","memory"); +"1: ldrex %0, [%5]\n" /* get original */ +" add %2, %0, %4\n" /* sum up in incr */ +" strex %1, %2, [%5]\n" /* store them */ +" teq %1, #0\n" +" bne 1b\n" + : "=&r"(result),"=&r"(flag),"=&r"(tmp),"+m"(*p) /* 0..3 */ + : "r"(incr), "r"(p) /* 4..5 */ + : "cc"); return result; } @@ -155,18 +158,18 @@ AO_fetch_and_add(volatile AO_t *p, AO_t incr) AO_INLINE AO_t AO_fetch_and_add1(volatile AO_t *p) { - unsigned long tmp,tmp2; + unsigned long flag,tmp; AO_t result; __asm__ __volatile__("@AO_fetch_and_add1\n" -"1: ldrex %0, [%3]\n" /* get original */ +"1: ldrex %0, [%4]\n" /* get original */ " add %1, %0, #1\n" /* increment */ -" strex %2, %1, [%3]\n" /* store them */ -" teq %2, #0\n" -" bne 1b\n" - : "=&r"(result), "=&r"(tmp), "=&r"(tmp2) +" strex %2, %1, [%4]\n" /* store them */ +" teq %2, #0\n" +" bne 1b\n" + : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p) : "r"(p) - : "cc","memory"); + : "cc"); return result; } @@ -177,18 +180,18 @@ AO_fetch_and_add1(volatile AO_t *p) AO_INLINE AO_t AO_fetch_and_sub1(volatile AO_t *p) { - unsigned long tmp,tmp2; + unsigned long flag,tmp; AO_t result; - __asm__ __volatile__("@ AO_fetch_and_sub1\n" -"1: ldrex %0, [%3]\n" /* get original */ -" sub %1, %0, #1\n" /* increment */ -" strex %2, %1, [%3]\n" /* store them */ -" teq %2, #0\n" -" bne 1b\n" - : "=&r"(result), "=&r"(tmp), "=&r"(tmp2) + __asm__ __volatile__("@AO_fetch_and_sub1\n" +"1: ldrex %0, [%4]\n" /* get original */ +" sub %1, %0, #1\n" /* decrement */ +" strex %2, %1, [%4]\n" /* store them */ +" teq %2, #0\n" +" bne 1b\n" + : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p) : "r"(p) - : "cc","memory"); + : "cc"); return result; } @@ -204,21 +207,49 @@ AO_compare_and_swap(volatile AO_t *addr, AO_t result,tmp; __asm__ __volatile__("@ AO_compare_and_swap\n" -"1: ldrex %1, [%2]\n" /* get original */ -" mov %0, #2\n" /* store a flag */ -" teq %1, %3\n" /* see if match */ -" strexeq %0, %4, [%2]\n" /* store new one if matched */ +"1: mov %0, #2\n" /* store a flag */ +" ldrex %1, [%3]\n" /* get original */ +" teq %1, %4\n" /* see if match */ +" strexeq %0, %5, [%3]\n" /* store new one if matched */ " teq %0, #1\n" " beq 1b\n" /* if update failed, repeat */ -" eor %0, %0, #2\n" /* if succeded, return 2, else 0 */ - : "=&r"(result), "=&r"(tmp) + : "=&r"(result), "=&r"(tmp), "+m"(*addr) : "r"(addr), "r"(old_val), "r"(new_val) - : "cc","memory"); + : "cc"); - return (result>>1); + return !(result&2); /* if succeded, return 1, else 0 */ } #define AO_HAVE_compare_and_swap +AO_INLINE int +AO_compare_double_and_swap_double(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2) +{ + double_ptr_storage old_val = ((double_ptr_storage)old_val2 << 32) | old_val1; + double_ptr_storage new_val = ((double_ptr_storage)new_val2 << 32) | new_val1; + + double_ptr_storage tmp; + int result; + + while(1) { + __asm__ __volatile__("@ AO_compare_and_swap_double\n" + " ldrexd %0, [%1]\n" /* get original to r1&r2*/ + : "=&r"(tmp) + : "r"(addr) + : ); + if(tmp != old_val) return false; + __asm__ __volatile__( + " strexd %0, %2, [%3]\n" /* store new one if matched */ + : "=&r"(result),"+m"(*addr) + : "r"(new_val), "r"(addr) + : ); + if(!result) return true; + } +} + +#define AO_HAVE_compare_double_and_swap_double + #else /* pre ARMv6 architecures ... */ -- 2.40.0