From: ivmai Date: Thu, 10 Sep 2009 14:12:12 +0000 (+0000) Subject: diff108_cvs X-Git-Tag: libatomic_ops-7_2alpha4~20 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2b0b217b51d2dfb9b68fbe8481da8cd107af187e;p=libatomic_ops diff108_cvs --- diff --git a/ChangeLog b/ChangeLog index 561a631..0ec61b5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,46 @@ +2009-08-06 Ivan Maidanski + * src/atomic_ops/sysdeps/gcc/x86_64.h: Remove comments about i486 + and 32-bit WinChips. + * src/atomic_ops/sysdeps/msftc/x86_64.h: Ditto. + * src/atomic_ops/sysdeps/gcc/x86_64.h (AO_nop_full): Replace + K&R-style function definition with ANSI C one. + * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_nop_full): Ditto. + * src/atomic_ops/sysdeps/gcc/x86_64.h + (AO_compare_double_and_swap_double_full): Fix comment. + * src/atomic_ops/sysdeps/gcc/x86_64.h + (AO_compare_double_and_swap_double_full): Swap all "val1" and "val2" + variables ("val1" is the lowest part of AO_double_t). + * src/atomic_ops/sysdeps/msftc/x86_64.h + (AO_compare_double_and_swap_double_full): Ditto. + * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove comment about + ASSUME_WINDOWS98. + * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_ASM_X64_AVAILABLE): New + macro. + * src/atomic_ops/sysdeps/msftc/x86_64.h: Include + "test_and_set_t_is_char.h" if AO_ASM_X64_AVAILABLE (same as in + x86_64.h for gcc); remove FIXME (for re-implement test-and-set). + * src/atomic_ops/sysdeps/msftc/x86_64.h: Include + "standard_ao_double_t.h" (same as in x86_64.h for gcc). + * src/atomic_ops/sysdeps/msftc/x86_64.h: Add comment for include + assuming at least VC++ v8. + * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove _Interlocked + prototypes (since they are always declared in intrin.h). + * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_nop_full): Move its + definition below CAS primitive (to textually group all asm-based + primitives together). + * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_test_and_set_full): + Implement for AO_ASM_X64_AVAILABLE case. + * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove AO_CASDOUBLE_MISSING + macro (replaced with AO_ASM_X64_AVAILABLE). + * src/atomic_ops/sysdeps/msftc/x86_64.h + (AO_compare_double_and_swap_double_full): Add intrinsic-based + implementation for VC++ v9+. + * src/atomic_ops/sysdeps/standard_ao_double_t.h: Include + (and use "__m128" type) if _WIN64. + * src/atomic_ops/sysdeps/standard_ao_double_t.h + (AO_HAVE_DOUBLE_PTR_STORAGE): Define it always (as + "double_ptr_storage" is defined for all cases). + 2009-09-09 Hans Boehm (Really mostly Patrick Marlier) * src/atomic_ops/sysdeps/gcc/sparc.h (NO_SPARC_V9): Renamed to AO_NO_SPARC_V9. diff --git a/src/atomic_ops/sysdeps/gcc/x86_64.h b/src/atomic_ops/sysdeps/gcc/x86_64.h index f2522b4..36085c4 100644 --- a/src/atomic_ops/sysdeps/gcc/x86_64.h +++ b/src/atomic_ops/sysdeps/gcc/x86_64.h @@ -16,18 +16,13 @@ * Some of the machine specific code was borrowed from our GC distribution. */ -/* The following really assume we have a 486 or better. Unfortunately */ -/* gcc doesn't define a suitable feature test macro based on command */ -/* line options. */ -/* We should perhaps test dynamically. */ - #include "../all_aligned_atomic_load_store.h" -/* Real X86 implementations, except for some old WinChips, appear */ +/* Real X86 implementations appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ -/* We ignore both the WinChips, and the fact that the official specs */ +/* We ignore the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" @@ -38,7 +33,7 @@ #if defined(AO_USE_PENTIUM4_INSTRS) AO_INLINE void -AO_nop_full() +AO_nop_full(void) { __asm__ __volatile__("mfence" : : : "memory"); } @@ -56,7 +51,6 @@ AO_nop_full() /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ -/* Really only works for 486 and later */ AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { @@ -109,7 +103,6 @@ AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr) #define AO_HAVE_int_fetch_and_add_full -/* Really only works for 486 and later */ AO_INLINE void AO_or_full (volatile AO_t *p, AO_t incr) { @@ -148,8 +141,9 @@ AO_compare_and_swap_full(volatile AO_t *addr, #ifdef AO_CMPXCHG16B_AVAILABLE /* NEC LE-IT: older AMD Opterons are missing this instruction. - * On these machines SIGILL will be thrown. Define AO_CASDOUBLE_MISSING - * to have an emulated (lock based) version available */ + * On these machines SIGILL will be thrown. + * Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated + * (lock based) version available */ /* HB: Changed this to not define either by default. There are * enough machines and tool chains around on which cmpxchg16b * doesn't work. And the emulation is unsafe by our usual rules. @@ -164,10 +158,10 @@ AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, __asm__ __volatile__("lock; cmpxchg16b %0; setz %1" : "=m"(*addr), "=q"(result) : "m"(*addr), - "d" (old_val1), - "a" (old_val2), - "c" (new_val1), - "b" (new_val2) : "memory"); + "d" (old_val2), + "a" (old_val1), + "c" (new_val2), + "b" (new_val1) : "memory"); return (int) result; } #define AO_HAVE_compare_double_and_swap_double_full diff --git a/src/atomic_ops/sysdeps/msftc/x86_64.h b/src/atomic_ops/sysdeps/msftc/x86_64.h index ca1a682..fb0b0ee 100644 --- a/src/atomic_ops/sysdeps/msftc/x86_64.h +++ b/src/atomic_ops/sysdeps/msftc/x86_64.h @@ -20,89 +20,40 @@ * SOFTWARE. */ -/* The following really assume we have a 486 or better. */ -/* If ASSUME_WINDOWS98 is defined, we assume Windows 98 or newer. */ - #include "../all_aligned_atomic_load_store.h" -/* Real X86 implementations, except for some old WinChips, appear */ +/* Real X86 implementations appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ -/* We ignore both the WinChips, and the fact that the official specs */ +/* We ignore the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" -#if 0 -FIXME: Need to reimplement testandset - -#include "../test_and_set_t_is_char.h" - +#ifdef AO_ASM_X64_AVAILABLE +# include "../test_and_set_t_is_char.h" #else - -#include "../test_and_set_t_is_ao_t.h" - +# include "../test_and_set_t_is_ao_t.h" #endif +#include "../standard_ao_double_t.h" + #include /* Seems like over-kill, but that's what MSDN recommends. */ /* And apparently winbase.h is not always self-contained. */ - +/* Assume _MSC_VER >= 1400 */ #include #pragma intrinsic (_ReadWriteBarrier) -#ifdef __cplusplus -extern "C" { -#endif - -LONGLONG __cdecl _InterlockedIncrement64(LONGLONG volatile *Addend); -LONGLONG __cdecl _InterlockedDecrement64(LONGLONG volatile *Addend); -LONGLONG __cdecl _InterlockedExchangeAdd64(LONGLONG volatile* Target, - LONGLONG Addend); -LONGLONG __cdecl _InterlockedExchange64(LONGLONG volatile* Target, - LONGLONG Value); -LONGLONG __cdecl _InterlockedCompareExchange64(LONGLONG volatile* Dest, - LONGLONG Exchange, - LONGLONG Comp); - -#ifdef __cplusplus -} -#endif - #pragma intrinsic (_InterlockedIncrement64) #pragma intrinsic (_InterlockedDecrement64) #pragma intrinsic (_InterlockedExchange64) #pragma intrinsic (_InterlockedExchangeAdd64) #pragma intrinsic (_InterlockedCompareExchange64) -/* As far as we can tell, the lfence and sfence instructions are not */ -/* currently needed or useful for cached memory accesses. */ - -/* Unfortunately mfence doesn't exist everywhere. */ -/* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */ -/* probably a conservative test for it? */ - -#if defined(AO_USE_PENTIUM4_INSTRS) - -AO_INLINE void -AO_nop_full() -{ - __asm { mfence } -} - -#define AO_HAVE_nop_full - -#else - -/* We could use the cpuid instruction. But that seems to be slower */ -/* than the default implementation based on test_and_set_full. Thus */ -/* we omit that bit of misinformation here. */ - -#endif - AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { @@ -138,49 +89,96 @@ AO_compare_and_swap_full(volatile AO_t *addr, #define AO_HAVE_compare_and_swap_full -#if 0 -FIXME: (__asm not supported) +/* As far as we can tell, the lfence and sfence instructions are not */ +/* currently needed or useful for cached memory accesses. */ + +/* Unfortunately mfence doesn't exist everywhere. */ +/* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */ +/* probably a conservative test for it? */ + +#if defined(AO_USE_PENTIUM4_INSTRS) + +AO_INLINE void +AO_nop_full(void) +{ + __asm { mfence } +} + +#define AO_HAVE_nop_full + +#else + +/* We could use the cpuid instruction. But that seems to be slower */ +/* than the default implementation based on test_and_set_full. Thus */ +/* we omit that bit of misinformation here. */ + +#endif + +#ifdef AO_ASM_X64_AVAILABLE + AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { __asm { - mov eax,AO_TS_SET ; - mov ebx,addr ; - xchg byte ptr [ebx],al ; + mov rax,AO_TS_SET ; + mov rbx,addr ; + xchg byte ptr [rbx],al ; } } #define AO_HAVE_test_and_set_full -FIXME: (__asm not supported) -NEC LE-IT: Don't have a working Win64 environment here at the moment. -AO_compare_double_and_swap_double_full needs implementation for Win64 -But there is no _InterlockedCompareExchange128 in the WinAPI, so we -need basically whats given below. -Also see gcc/x86_64.h for partial old opteron workaround: +#endif /* AO_ASM_X64_AVAILABLE */ + +#ifdef AO_CMPXCHG16B_AVAILABLE -#ifndef AO_CASDOUBLE_MISSING +/* AO_compare_double_and_swap_double_full needs implementation for Win64. + * Also see ../gcc/x86_64.h for partial old Opteron workaround. + */ + +# if _MSC_VER >= 1500 + +#pragma intrinsic (_InterlockedCompareExchange128) + +AO_INLINE int +AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2) +{ + __int64 comparandResult[2]; + comparandResult[0] = old_val1; /* low */ + comparandResult[1] = old_val2; /* high */ + return _InterlockedCompareExchange128((volatile __int64 *)addr, + new_val2 /* high */, new_val1 /* low */, comparandResult); +} + +# define AO_HAVE_compare_double_and_swap_double_full + +# elif defined(AO_ASM_X64_AVAILABLE) + + /* If there is no intrinsic _InterlockedCompareExchange128 then we + * need basically what's given below. + */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { - char result; __asm { - mov rdx,QWORD PTR [old_val] - mov rax,QWORD PTR [old_val + 8] - mov rcx,QWORD PTR [new_val] - mov rbx,QWORD PTR [new_val + 8] - lock cmpxchg16b [addr] - setz result; + mov rdx,QWORD PTR [old_val2] ; + mov rax,QWORD PTR [old_val1] ; + mov rcx,QWORD PTR [new_val2] ; + mov rbx,QWORD PTR [new_val1] ; + lock cmpxchg16b [addr] ; + setz rax ; } - return result; } -#endif // AO_CASDOUBLE_MISSING -#define AO_HAVE_compare_double_and_swap_double_full -#endif /* 0 */ +# define AO_HAVE_compare_double_and_swap_double_full + +# endif /* _MSC_VER >= 1500 || AO_ASM_X64_AVAILABLE */ +#endif /* AO_CMPXCHG16B_AVAILABLE */ diff --git a/src/atomic_ops/sysdeps/standard_ao_double_t.h b/src/atomic_ops/sysdeps/standard_ao_double_t.h index 22e8160..1b52d2d 100644 --- a/src/atomic_ops/sysdeps/standard_ao_double_t.h +++ b/src/atomic_ops/sysdeps/standard_ao_double_t.h @@ -4,27 +4,16 @@ * to align it on 16 byte boundary (as required by cmpxchg16. * Similar things could be done for PowerPC 64bit using a VMX data type... */ -#if defined(__GNUC__) -# if defined(__x86_64__) -# include - typedef __m128 double_ptr_storage; -# define AO_HAVE_DOUBLE_PTR_STORAGE -# endif /* __x86_64__ */ +#if (defined(__x86_64__) && defined(__GNUC__)) || defined(_WIN64) +# include + typedef __m128 double_ptr_storage; +#elif defined(_WIN32) && !defined(__GNUC__) + typedef unsigned __int64 double_ptr_storage; +#else + typedef unsigned long long double_ptr_storage; #endif -#ifdef _MSC_VER -# ifdef _WIN64 - typedef __m128 double_ptr_storage; -# define AO_HAVE_DOUBLE_PTR_STORAGE -# elif _WIN32 - typedef unsigned __int64 double_ptr_storage; -# define AO_HAVE_DOUBLE_PTR_STORAGE -# endif -#endif - -#ifndef AO_HAVE_DOUBLE_PTR_STORAGE - typedef unsigned long long double_ptr_storage; -#endif +# define AO_HAVE_DOUBLE_PTR_STORAGE typedef union { double_ptr_storage AO_whole;