]> granicus.if.org Git - libatomic_ops/commitdiff
diff108_cvs
authorivmai <ivmai>
Thu, 10 Sep 2009 14:12:12 +0000 (14:12 +0000)
committerIvan Maidanski <ivmai@mail.ru>
Mon, 25 Jul 2011 12:03:24 +0000 (16:03 +0400)
ChangeLog
src/atomic_ops/sysdeps/gcc/x86_64.h
src/atomic_ops/sysdeps/msftc/x86_64.h
src/atomic_ops/sysdeps/standard_ao_double_t.h

index 561a631132f5e76c0828c7ea21291d0ffd8d5482..0ec61b5cf3bfbc565d672a93f94db1fa3e16476f 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,46 @@
+2009-08-06 Ivan Maidanski <ivmai@mail.ru>
+       * src/atomic_ops/sysdeps/gcc/x86_64.h: Remove comments about i486
+       and 32-bit WinChips.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Ditto.
+       * src/atomic_ops/sysdeps/gcc/x86_64.h (AO_nop_full): Replace
+       K&R-style function definition with ANSI C one.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_nop_full): Ditto.
+       * src/atomic_ops/sysdeps/gcc/x86_64.h
+       (AO_compare_double_and_swap_double_full): Fix comment.
+       * src/atomic_ops/sysdeps/gcc/x86_64.h
+       (AO_compare_double_and_swap_double_full): Swap all "val1" and "val2"
+       variables ("val1" is the lowest part of AO_double_t).
+       * src/atomic_ops/sysdeps/msftc/x86_64.h
+       (AO_compare_double_and_swap_double_full): Ditto.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove comment about
+       ASSUME_WINDOWS98.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_ASM_X64_AVAILABLE): New
+       macro.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Include
+       "test_and_set_t_is_char.h" if AO_ASM_X64_AVAILABLE (same as in
+       x86_64.h for gcc); remove FIXME (for re-implement test-and-set).
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Include
+       "standard_ao_double_t.h" (same as in x86_64.h for gcc).
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Add comment for include
+       <intrin.h> assuming at least VC++ v8.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove _Interlocked
+       prototypes (since they are always declared in intrin.h).
+       * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_nop_full): Move its
+       definition below CAS primitive (to textually group all asm-based
+       primitives together).
+       * src/atomic_ops/sysdeps/msftc/x86_64.h (AO_test_and_set_full):
+       Implement for AO_ASM_X64_AVAILABLE case.
+       * src/atomic_ops/sysdeps/msftc/x86_64.h: Remove AO_CASDOUBLE_MISSING
+       macro (replaced with AO_ASM_X64_AVAILABLE).
+       * src/atomic_ops/sysdeps/msftc/x86_64.h
+       (AO_compare_double_and_swap_double_full): Add intrinsic-based
+       implementation for VC++ v9+.
+       * src/atomic_ops/sysdeps/standard_ao_double_t.h: Include
+       <xmmintrin.h> (and use "__m128" type) if _WIN64.
+       * src/atomic_ops/sysdeps/standard_ao_double_t.h
+       (AO_HAVE_DOUBLE_PTR_STORAGE): Define it always (as
+       "double_ptr_storage" is defined for all cases).
+
 2009-09-09 Hans Boehm <Hans.Boehm@hp.com> (Really mostly Patrick Marlier)
        * src/atomic_ops/sysdeps/gcc/sparc.h (NO_SPARC_V9):
        Renamed to AO_NO_SPARC_V9.
index f2522b4ba90c0cc82cabba5cad6e5c2ad92510c0..36085c4c4c5b1e3387a851bff50c53bfac01dba2 100644 (file)
  * Some of the machine specific code was borrowed from our GC distribution.
  */
 
-/* The following really assume we have a 486 or better.  Unfortunately */
-/* gcc doesn't define a suitable feature test macro based on command   */
-/* line options.                                                       */
-/* We should perhaps test dynamically.                                 */
-
 #include "../all_aligned_atomic_load_store.h"
 
-/* Real X86 implementations, except for some old WinChips, appear      */
+/* Real X86 implementations appear                                     */
 /* to enforce ordering between memory operations, EXCEPT that a later  */
 /* read can pass earlier writes, presumably due to the visible         */
 /* presence of store buffers.                                          */
-/* We ignore both the WinChips, and the fact that the official specs   */
+/* We ignore the fact that the official specs                          */
 /* seem to be much weaker (and arguably too weak to be usable).                */
 
 #include "../ordered_except_wr.h"
@@ -38,7 +33,7 @@
 
 #if defined(AO_USE_PENTIUM4_INSTRS)
 AO_INLINE void
-AO_nop_full()
+AO_nop_full(void)
 {
   __asm__ __volatile__("mfence" : : : "memory");
 }
@@ -56,7 +51,6 @@ AO_nop_full()
 /* As far as we can tell, the lfence and sfence instructions are not   */
 /* currently needed or useful for cached memory accesses.              */
 
-/* Really only works for 486 and later */
 AO_INLINE AO_t
 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
 {
@@ -109,7 +103,6 @@ AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr)
 
 #define AO_HAVE_int_fetch_and_add_full
 
-/* Really only works for 486 and later */
 AO_INLINE void
 AO_or_full (volatile AO_t *p, AO_t incr)
 {
@@ -148,8 +141,9 @@ AO_compare_and_swap_full(volatile AO_t *addr,
 
 #ifdef AO_CMPXCHG16B_AVAILABLE
 /* NEC LE-IT: older AMD Opterons are missing this instruction.
- * On these machines SIGILL will be thrown. Define AO_CASDOUBLE_MISSING
- * to have an emulated (lock based) version available */ 
+ * On these machines SIGILL will be thrown.
+ * Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated
+ * (lock based) version available */ 
 /* HB: Changed this to not define either by default.  There are
  * enough machines and tool chains around on which cmpxchg16b
  * doesn't work.  And the emulation is unsafe by our usual rules.
@@ -164,10 +158,10 @@ AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
   __asm__ __volatile__("lock; cmpxchg16b %0; setz %1"
                                : "=m"(*addr), "=q"(result)
                                        : "m"(*addr),
-                                         "d" (old_val1),
-                                         "a" (old_val2),
-                                         "c" (new_val1),
-                                         "b" (new_val2)  : "memory");
+                                         "d" (old_val2),
+                                         "a" (old_val1),
+                                         "c" (new_val2),
+                                         "b" (new_val1)  : "memory");
   return (int) result;
 }
 #define AO_HAVE_compare_double_and_swap_double_full
index ca1a682e1e64eb7a6cbcbb0e2f464b26586e59a6..fb0b0ee11b98e80d120a66965a906b3bc7422316 100644 (file)
  * SOFTWARE. 
  */
 
-/* The following really assume we have a 486 or better. */
-/* If ASSUME_WINDOWS98 is defined, we assume Windows 98 or newer.      */
-
 #include "../all_aligned_atomic_load_store.h"
 
-/* Real X86 implementations, except for some old WinChips, appear      */
+/* Real X86 implementations appear                                     */
 /* to enforce ordering between memory operations, EXCEPT that a later  */
 /* read can pass earlier writes, presumably due to the visible         */
 /* presence of store buffers.                                          */
-/* We ignore both the WinChips, and the fact that the official specs   */
+/* We ignore the fact that the official specs                          */
 /* seem to be much weaker (and arguably too weak to be usable).                */
 
 #include "../ordered_except_wr.h"
 
-#if 0
-FIXME: Need to reimplement testandset
-
-#include "../test_and_set_t_is_char.h"
-
+#ifdef AO_ASM_X64_AVAILABLE
+# include "../test_and_set_t_is_char.h"
 #else
-
-#include "../test_and_set_t_is_ao_t.h"
-
+# include "../test_and_set_t_is_ao_t.h"
 #endif
 
+#include "../standard_ao_double_t.h"
+
 #include <windows.h>
        /* Seems like over-kill, but that's what MSDN recommends.       */
        /* And apparently winbase.h is not always self-contained.       */
 
-
+/* Assume _MSC_VER >= 1400 */
 #include <intrin.h>
 
 #pragma intrinsic (_ReadWriteBarrier)
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-LONGLONG __cdecl _InterlockedIncrement64(LONGLONG volatile *Addend);
-LONGLONG __cdecl _InterlockedDecrement64(LONGLONG volatile *Addend);
-LONGLONG __cdecl _InterlockedExchangeAdd64(LONGLONG volatile* Target,
-                                          LONGLONG Addend);
-LONGLONG __cdecl _InterlockedExchange64(LONGLONG volatile* Target,
-                                       LONGLONG Value);
-LONGLONG __cdecl _InterlockedCompareExchange64(LONGLONG volatile* Dest,
-                                               LONGLONG Exchange,
-                                              LONGLONG Comp);
-
-#ifdef __cplusplus
-}
-#endif
-
 #pragma intrinsic (_InterlockedIncrement64)
 #pragma intrinsic (_InterlockedDecrement64)
 #pragma intrinsic (_InterlockedExchange64)
 #pragma intrinsic (_InterlockedExchangeAdd64)
 #pragma intrinsic (_InterlockedCompareExchange64)
 
-/* As far as we can tell, the lfence and sfence instructions are not   */
-/* currently needed or useful for cached memory accesses.              */
-
-/* Unfortunately mfence doesn't exist everywhere.              */
-/* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is                */
-/* probably a conservative test for it?                                */
-
-#if defined(AO_USE_PENTIUM4_INSTRS)
-
-AO_INLINE void
-AO_nop_full()
-{
-  __asm { mfence }
-}
-
-#define AO_HAVE_nop_full
-
-#else
-
-/* We could use the cpuid instruction.  But that seems to be slower    */
-/* than the default implementation based on test_and_set_full.  Thus   */
-/* we omit that bit of misinformation here.                            */
-
-#endif
-
 AO_INLINE AO_t
 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
 {
@@ -138,49 +89,96 @@ AO_compare_and_swap_full(volatile AO_t *addr,
 
 #define AO_HAVE_compare_and_swap_full
 
-#if 0
-FIXME: (__asm not supported)
+/* As far as we can tell, the lfence and sfence instructions are not   */
+/* currently needed or useful for cached memory accesses.              */
+
+/* Unfortunately mfence doesn't exist everywhere.              */
+/* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is                */
+/* probably a conservative test for it?                                */
+
+#if defined(AO_USE_PENTIUM4_INSTRS)
+
+AO_INLINE void
+AO_nop_full(void)
+{
+  __asm { mfence }
+}
+
+#define AO_HAVE_nop_full
+
+#else
+
+/* We could use the cpuid instruction.  But that seems to be slower    */
+/* than the default implementation based on test_and_set_full.  Thus   */
+/* we omit that bit of misinformation here.                            */
+
+#endif
+
+#ifdef AO_ASM_X64_AVAILABLE
+
 AO_INLINE AO_TS_VAL_t
 AO_test_and_set_full(volatile AO_TS_t *addr)
 {
     __asm
     {
-       mov     eax,AO_TS_SET           ;
-       mov     ebx,addr                ;
-       xchg    byte ptr [ebx],al       ;
+       mov     rax,AO_TS_SET           ;
+       mov     rbx,addr                ;
+       xchg    byte ptr [rbx],al       ;
     }
 }
 
 #define AO_HAVE_test_and_set_full
 
-FIXME: (__asm not supported)
-NEC LE-IT: Don't have a working Win64 environment here at the moment.
-AO_compare_double_and_swap_double_full needs implementation for Win64
-But there is no _InterlockedCompareExchange128 in the WinAPI, so we
-need basically whats given below.
-Also see gcc/x86_64.h for partial old opteron workaround:
+#endif /* AO_ASM_X64_AVAILABLE */
+
+#ifdef AO_CMPXCHG16B_AVAILABLE
 
-#ifndef AO_CASDOUBLE_MISSING
+/* AO_compare_double_and_swap_double_full needs implementation for Win64.
+ * Also see ../gcc/x86_64.h for partial old Opteron workaround.
+ */
+
+# if _MSC_VER >= 1500
+
+#pragma intrinsic (_InterlockedCompareExchange128)
+
+AO_INLINE int
+AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
+                                      AO_t old_val1, AO_t old_val2,
+                                      AO_t new_val1, AO_t new_val2)
+{
+   __int64 comparandResult[2];
+   comparandResult[0] = old_val1; /* low */
+   comparandResult[1] = old_val2; /* high */
+   return _InterlockedCompareExchange128((volatile __int64 *)addr,
+               new_val2 /* high */, new_val1 /* low */, comparandResult);
+}
+
+#   define AO_HAVE_compare_double_and_swap_double_full
+
+# elif defined(AO_ASM_X64_AVAILABLE)
+
+ /* If there is no intrinsic _InterlockedCompareExchange128 then we
+  * need basically what's given below.
+  */
 
 AO_INLINE int
 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
                                       AO_t old_val1, AO_t old_val2,
                                       AO_t new_val1, AO_t new_val2)
 {
-       char result;
        __asm
        {
-               mov     rdx,QWORD PTR [old_val]
-               mov     rax,QWORD PTR [old_val + 8]
-               mov     rcx,QWORD PTR [new_val]
-               mov     rbx,QWORD PTR [new_val + 8]
-               lock cmpxchg16b [addr]
-               setz result;
+               mov     rdx,QWORD PTR [old_val2]        ;
+               mov     rax,QWORD PTR [old_val1]        ;
+               mov     rcx,QWORD PTR [new_val2]        ;
+               mov     rbx,QWORD PTR [new_val1]        ;
+               lock cmpxchg16b [addr]                  ;
+               setz    rax                             ;
        }
-       return result;
 }
-#endif // AO_CASDOUBLE_MISSING
-#define AO_HAVE_compare_double_and_swap_double_full
 
-#endif /* 0 */
+#   define AO_HAVE_compare_double_and_swap_double_full
+
+# endif /* _MSC_VER >= 1500 || AO_ASM_X64_AVAILABLE */
 
+#endif /* AO_CMPXCHG16B_AVAILABLE */
index 22e8160c86358714a8fe32494a08edb241dd6722..1b52d2d8acdffe3ab02890a672c13eb83a93304d 100644 (file)
@@ -4,27 +4,16 @@
 *         to align     it on 16 byte boundary (as required by cmpxchg16.
 * Similar things could be done for PowerPC 64bit using a VMX data type...      */
 
-#if defined(__GNUC__)
-# if defined(__x86_64__)
-# include<xmmintrin.h>
-   typedef __m128 double_ptr_storage;
-#  define AO_HAVE_DOUBLE_PTR_STORAGE
-# endif /* __x86_64__ */
+#if (defined(__x86_64__) && defined(__GNUC__)) || defined(_WIN64)
+# include <xmmintrin.h>
+  typedef __m128 double_ptr_storage;
+#elif defined(_WIN32) && !defined(__GNUC__)
+  typedef unsigned __int64 double_ptr_storage;
+#else
+  typedef unsigned long long double_ptr_storage;
 #endif
 
-#ifdef _MSC_VER
-# ifdef _WIN64
-   typedef __m128 double_ptr_storage;
-#  define AO_HAVE_DOUBLE_PTR_STORAGE
-# elif _WIN32
-   typedef unsigned __int64 double_ptr_storage;
-#  define AO_HAVE_DOUBLE_PTR_STORAGE
-# endif
-#endif
-
-#ifndef AO_HAVE_DOUBLE_PTR_STORAGE
-   typedef unsigned long long double_ptr_storage;
-#endif
+# define AO_HAVE_DOUBLE_PTR_STORAGE
 
 typedef union {
     double_ptr_storage AO_whole;