+2009-08-06 Ivan Maidanski <ivmai@mail.ru>
+ * src/atomic_ops/sysdeps/gcc/x86_64.h: Remove comments about i486
+ and 32-bit WinChips.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Ditto.
+ * src/atomic_ops/sysdeps/gcc/x86_64.h (AO_nop_full): Replace
+ K&R-style function definition with ANSI C one.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_nop_full): Ditto.
+ * src/atomic_ops/sysdeps/gcc/x86_64.h
+ (AO_compare_double_and_swap_double_full): Fix comment.
+ * src/atomic_ops/sysdeps/gcc/x86_64.h
+ (AO_compare_double_and_swap_double_full): Swap all "val1" and "val2"
+ variables ("val1" is the lowest part of AO_double_t).
+ * src/atomic_ops/sysdeps/msftc/x86_64.h
+ (AO_compare_double_and_swap_double_full): Ditto.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove comment about
+ ASSUME_WINDOWS98.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_ASM_X64_AVAILABLE): New
+ macro.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Include
+ "test_and_set_t_is_char.h" if AO_ASM_X64_AVAILABLE (same as in
+ x86_64.h for gcc); remove FIXME (for re-implement test-and-set).
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Include
+ "standard_ao_double_t.h" (same as in x86_64.h for gcc).
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Add comment for include
+ <intrin.h> assuming at least VC++ v8.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove _Interlocked
+ prototypes (since they are always declared in intrin.h).
+ * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_nop_full): Move its
+ definition below CAS primitive (to textually group all asm-based
+ primitives together).
+ * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_test_and_set_full):
+ Implement for AO_ASM_X64_AVAILABLE case.
+ * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove AO_CASDOUBLE_MISSING
+ macro (replaced with AO_ASM_X64_AVAILABLE).
+ * src/atomic_ops/sysdeps/msftc/x86_64.h
+ (AO_compare_double_and_swap_double_full): Add intrinsic-based
+ implementation for VC++ v9+.
+ * src/atomic_ops/sysdeps/standard_ao_double_t.h: Include
+ <xmmintrin.h> (and use "__m128" type) if _WIN64.
+ * src/atomic_ops/sysdeps/standard_ao_double_t.h
+ (AO_HAVE_DOUBLE_PTR_STORAGE): Define it always (as
+ "double_ptr_storage" is defined for all cases).
+
2009-09-09 Hans Boehm <Hans.Boehm@hp.com> (Really mostly Patrick Marlier)
* src/atomic_ops/sysdeps/gcc/sparc.h (NO_SPARC_V9):
Renamed to AO_NO_SPARC_V9.
* Some of the machine specific code was borrowed from our GC distribution.
*/
-/* The following really assume we have a 486 or better. Unfortunately */
-/* gcc doesn't define a suitable feature test macro based on command */
-/* line options. */
-/* We should perhaps test dynamically. */
-
#include "../all_aligned_atomic_load_store.h"
-/* Real X86 implementations, except for some old WinChips, appear */
+/* Real X86 implementations appear */
/* to enforce ordering between memory operations, EXCEPT that a later */
/* read can pass earlier writes, presumably due to the visible */
/* presence of store buffers. */
-/* We ignore both the WinChips, and the fact that the official specs */
+/* We ignore the fact that the official specs */
/* seem to be much weaker (and arguably too weak to be usable). */
#include "../ordered_except_wr.h"
#if defined(AO_USE_PENTIUM4_INSTRS)
AO_INLINE void
-AO_nop_full()
+AO_nop_full(void)
{
__asm__ __volatile__("mfence" : : : "memory");
}
/* As far as we can tell, the lfence and sfence instructions are not */
/* currently needed or useful for cached memory accesses. */
-/* Really only works for 486 and later */
AO_INLINE AO_t
AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
{
#define AO_HAVE_int_fetch_and_add_full
-/* Really only works for 486 and later */
AO_INLINE void
AO_or_full (volatile AO_t *p, AO_t incr)
{
#ifdef AO_CMPXCHG16B_AVAILABLE
/* NEC LE-IT: older AMD Opterons are missing this instruction.
- * On these machines SIGILL will be thrown. Define AO_CASDOUBLE_MISSING
- * to have an emulated (lock based) version available */
+ * On these machines SIGILL will be thrown.
+ * Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated
+ * (lock based) version available */
/* HB: Changed this to not define either by default. There are
* enough machines and tool chains around on which cmpxchg16b
* doesn't work. And the emulation is unsafe by our usual rules.
__asm__ __volatile__("lock; cmpxchg16b %0; setz %1"
: "=m"(*addr), "=q"(result)
: "m"(*addr),
- "d" (old_val1),
- "a" (old_val2),
- "c" (new_val1),
- "b" (new_val2) : "memory");
+ "d" (old_val2),
+ "a" (old_val1),
+ "c" (new_val2),
+ "b" (new_val1) : "memory");
return (int) result;
}
#define AO_HAVE_compare_double_and_swap_double_full
* SOFTWARE.
*/
-/* The following really assume we have a 486 or better. */
-/* If ASSUME_WINDOWS98 is defined, we assume Windows 98 or newer. */
-
#include "../all_aligned_atomic_load_store.h"
-/* Real X86 implementations, except for some old WinChips, appear */
+/* Real X86 implementations appear */
/* to enforce ordering between memory operations, EXCEPT that a later */
/* read can pass earlier writes, presumably due to the visible */
/* presence of store buffers. */
-/* We ignore both the WinChips, and the fact that the official specs */
+/* We ignore the fact that the official specs */
/* seem to be much weaker (and arguably too weak to be usable). */
#include "../ordered_except_wr.h"
-#if 0
-FIXME: Need to reimplement testandset
-
-#include "../test_and_set_t_is_char.h"
-
+#ifdef AO_ASM_X64_AVAILABLE
+# include "../test_and_set_t_is_char.h"
#else
-
-#include "../test_and_set_t_is_ao_t.h"
-
+# include "../test_and_set_t_is_ao_t.h"
#endif
+#include "../standard_ao_double_t.h"
+
#include <windows.h>
/* Seems like over-kill, but that's what MSDN recommends. */
/* And apparently winbase.h is not always self-contained. */
-
+/* Assume _MSC_VER >= 1400 */
#include <intrin.h>
#pragma intrinsic (_ReadWriteBarrier)
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-LONGLONG __cdecl _InterlockedIncrement64(LONGLONG volatile *Addend);
-LONGLONG __cdecl _InterlockedDecrement64(LONGLONG volatile *Addend);
-LONGLONG __cdecl _InterlockedExchangeAdd64(LONGLONG volatile* Target,
- LONGLONG Addend);
-LONGLONG __cdecl _InterlockedExchange64(LONGLONG volatile* Target,
- LONGLONG Value);
-LONGLONG __cdecl _InterlockedCompareExchange64(LONGLONG volatile* Dest,
- LONGLONG Exchange,
- LONGLONG Comp);
-
-#ifdef __cplusplus
-}
-#endif
-
#pragma intrinsic (_InterlockedIncrement64)
#pragma intrinsic (_InterlockedDecrement64)
#pragma intrinsic (_InterlockedExchange64)
#pragma intrinsic (_InterlockedExchangeAdd64)
#pragma intrinsic (_InterlockedCompareExchange64)
-/* As far as we can tell, the lfence and sfence instructions are not */
-/* currently needed or useful for cached memory accesses. */
-
-/* Unfortunately mfence doesn't exist everywhere. */
-/* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */
-/* probably a conservative test for it? */
-
-#if defined(AO_USE_PENTIUM4_INSTRS)
-
-AO_INLINE void
-AO_nop_full()
-{
- __asm { mfence }
-}
-
-#define AO_HAVE_nop_full
-
-#else
-
-/* We could use the cpuid instruction. But that seems to be slower */
-/* than the default implementation based on test_and_set_full. Thus */
-/* we omit that bit of misinformation here. */
-
-#endif
-
AO_INLINE AO_t
AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
{
#define AO_HAVE_compare_and_swap_full
-#if 0
-FIXME: (__asm not supported)
+/* As far as we can tell, the lfence and sfence instructions are not */
+/* currently needed or useful for cached memory accesses. */
+
+/* Unfortunately mfence doesn't exist everywhere. */
+/* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */
+/* probably a conservative test for it? */
+
+#if defined(AO_USE_PENTIUM4_INSTRS)
+
+AO_INLINE void
+AO_nop_full(void)
+{
+ __asm { mfence }
+}
+
+#define AO_HAVE_nop_full
+
+#else
+
+/* We could use the cpuid instruction. But that seems to be slower */
+/* than the default implementation based on test_and_set_full. Thus */
+/* we omit that bit of misinformation here. */
+
+#endif
+
+#ifdef AO_ASM_X64_AVAILABLE
+
AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t *addr)
{
__asm
{
- mov eax,AO_TS_SET ;
- mov ebx,addr ;
- xchg byte ptr [ebx],al ;
+ mov rax,AO_TS_SET ;
+ mov rbx,addr ;
+ xchg byte ptr [rbx],al ;
}
}
#define AO_HAVE_test_and_set_full
-FIXME: (__asm not supported)
-NEC LE-IT: Don't have a working Win64 environment here at the moment.
-AO_compare_double_and_swap_double_full needs implementation for Win64
-But there is no _InterlockedCompareExchange128 in the WinAPI, so we
-need basically whats given below.
-Also see gcc/x86_64.h for partial old opteron workaround:
+#endif /* AO_ASM_X64_AVAILABLE */
+
+#ifdef AO_CMPXCHG16B_AVAILABLE
-#ifndef AO_CASDOUBLE_MISSING
+/* AO_compare_double_and_swap_double_full needs implementation for Win64.
+ * Also see ../gcc/x86_64.h for partial old Opteron workaround.
+ */
+
+# if _MSC_VER >= 1500
+
+#pragma intrinsic (_InterlockedCompareExchange128)
+
+AO_INLINE int
+AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
+ AO_t old_val1, AO_t old_val2,
+ AO_t new_val1, AO_t new_val2)
+{
+ __int64 comparandResult[2];
+ comparandResult[0] = old_val1; /* low */
+ comparandResult[1] = old_val2; /* high */
+ return _InterlockedCompareExchange128((volatile __int64 *)addr,
+ new_val2 /* high */, new_val1 /* low */, comparandResult);
+}
+
+# define AO_HAVE_compare_double_and_swap_double_full
+
+# elif defined(AO_ASM_X64_AVAILABLE)
+
+ /* If there is no intrinsic _InterlockedCompareExchange128 then we
+ * need basically what's given below.
+ */
AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
AO_t old_val1, AO_t old_val2,
AO_t new_val1, AO_t new_val2)
{
- char result;
__asm
{
- mov rdx,QWORD PTR [old_val]
- mov rax,QWORD PTR [old_val + 8]
- mov rcx,QWORD PTR [new_val]
- mov rbx,QWORD PTR [new_val + 8]
- lock cmpxchg16b [addr]
- setz result;
+ mov rdx,QWORD PTR [old_val2] ;
+ mov rax,QWORD PTR [old_val1] ;
+ mov rcx,QWORD PTR [new_val2] ;
+ mov rbx,QWORD PTR [new_val1] ;
+ lock cmpxchg16b [addr] ;
+ setz rax ;
}
- return result;
}
-#endif // AO_CASDOUBLE_MISSING
-#define AO_HAVE_compare_double_and_swap_double_full
-#endif /* 0 */
+# define AO_HAVE_compare_double_and_swap_double_full
+
+# endif /* _MSC_VER >= 1500 || AO_ASM_X64_AVAILABLE */
+#endif /* AO_CMPXCHG16B_AVAILABLE */