]> granicus.if.org Git - libatomic_ops/commitdiff
2008-01-06 Hans Boehm <Hans.Boehm@hp.com> (Really mostly Joerg Wagner)
authorhboehm <hboehm>
Mon, 7 Jan 2008 05:11:52 +0000 (05:11 +0000)
committerIvan Maidanski <ivmai@mail.ru>
Mon, 25 Jul 2011 12:03:24 +0000 (16:03 +0400)
* src/atomic_ops/generalize.h: Add test_and_set generalizations,
Add AO_double_compare_and_swap generalizations.
* src/atomic_ops/sysdeps/armcc/arm_v6.h: New file.
* src/atomic_ops/sysdeps/gcc/arm.h: Handle V6 and V7.
* src/atomic_ops/sysdeps/gcc/x86.h,
src/atomic_ops/sysdeps/{gcc,msftc}/x86_64.h: Conditionally add
compare_double_and_swap_double, commented out for msftc.
* src/atomic_ops/sysdeps/standard_ao_double_t.h:  Add
double_ptr_storage field.

ChangeLog
src/atomic_ops/generalize.h
src/atomic_ops/sysdeps/armcc/arm_v6.h [new file with mode: 0644]
src/atomic_ops/sysdeps/gcc/arm.h
src/atomic_ops/sysdeps/gcc/x86_64.h
src/atomic_ops/sysdeps/msftc/x86_64.h
src/atomic_ops/sysdeps/standard_ao_double_t.h

index ce3afe9950843b0cedc3110e03c4c4ba6672bc12..043b4844cd497e0390727d1c797900c3213b8b29 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2008-01-06 Hans Boehm <Hans.Boehm@hp.com> (Really mostly Joerg Wagner)
+       * src/atomic_ops/generalize.h: Add test_and_set generalizations,
+       Add AO_double_compare_and_swap generalizations.
+       * src/atomic_ops/sysdeps/armcc/arm_v6.h: New file.
+       * src/atomic_ops/sysdeps/gcc/arm.h: Handle V6 and V7.
+       * src/atomic_ops/sysdeps/gcc/x86.h,
+       src/atomic_ops/sysdeps/{gcc,msftc}/x86_64.h: Conditionally add
+       compare_double_and_swap_double, commented out for msftc.
+       * src/atomic_ops/sysdeps/standard_ao_double_t.h:  Add
+       double_ptr_storage field.
+
 2008-01-03 Hans Boehm <Hans.Boehm@hp.com>
        (Merge from separate atomic_ops tree)
        * src/atomic_ops/sysdeps/gcc/x86.h: Define correct macro for
index 0f42b328d1bd6e24176d9b8451b586fd1f2c6102..bfcf2d7ba227a9ee6f47932e82a36a7874bc90c1 100644 (file)
 #  define AO_HAVE_store_full
 #endif
 
+/* NEC LE-IT: Test and set */
+#if defined(AO_HAVE_test_and_set) && \
+       defined(AO_HAVE_nop_full) && \
+    !defined(AO_HAVE_test_and_set_release)
+#      define AO_test_and_set_release(addr) \
+       (AO_nop_full(), AO_test_and_set(addr))
+#  define AO_HAVE_test_and_set_release
+#endif
+
+#if defined(AO_HAVE_test_and_set) && \
+       defined(AO_HAVE_nop_full) && \
+    !defined(AO_HAVE_test_and_set_acquire)
+AO_INLINE AO_TS_t
+AO_test_and_set_acquire(volatile AO_TS_t *addr)
+{
+       AO_TS_t res = AO_test_and_set(addr);
+       AO_nop_full();
+       return res; 
+}  
+#  define AO_HAVE_test_and_set_acquire
+#endif
+
   
 /* Fetch_and_add */
 /* We first try to implement fetch_and_add variants in terms   */
 #    define AO_HAVE_compare_and_swap_double_dd_acquire_read
 #  endif
 #endif
+
+/* NEC LE-IT: Convenience functions for AO_double compare and swap which */
+/* types and reads easier in code                                       */
+#if defined(AO_HAVE_compare_double_and_swap_double_release) && \
+    !defined(AO_HAVE_double_compare_and_swap_release)
+AO_INLINE int
+AO_double_compare_and_swap_release(volatile AO_double_t *addr,
+                                  AO_double_t old_val, AO_double_t new_val) 
+{
+       return AO_compare_double_and_swap_double_release(addr,
+                                                        old_val.AO_val1, old_val.AO_val2,
+                                                        new_val.AO_val1, new_val.AO_val2);
+}
+#define AO_HAVE_double_compare_and_swap_release
+#endif
+
+#if defined(AO_HAVE_compare_double_and_swap_double_acquire) && \
+    !defined(AO_HAVE_double_compare_and_swap_acquire)
+AO_INLINE int
+AO_double_compare_and_swap_acquire(volatile AO_double_t *addr,
+                                  AO_double_t old_val, AO_double_t new_val) 
+{
+       return AO_compare_double_and_swap_double_acquire(addr,
+                                                        old_val.AO_val1, old_val.AO_val2,
+                                                        new_val.AO_val1, new_val.AO_val2);
+}
+#define AO_HAVE_double_compare_and_swap_acquire
+#endif
+
+#if defined(AO_HAVE_compare_double_and_swap_double_full) && \
+    !defined(AO_HAVE_double_compare_and_swap_full)
+AO_INLINE int
+AO_double_compare_and_swap_full(volatile AO_double_t *addr,
+                                        AO_double_t old_val, AO_double_t new_val) 
+{
+       return AO_compare_double_and_swap_double_full(addr,
+                                                     old_val.AO_val1, old_val.AO_val2,
+                                                     new_val.AO_val1, new_val.AO_val2);
+}
+#define AO_HAVE_double_compare_and_swap_full
+#endif
diff --git a/src/atomic_ops/sysdeps/armcc/arm_v6.h b/src/atomic_ops/sysdeps/armcc/arm_v6.h
new file mode 100644 (file)
index 0000000..3e2318b
--- /dev/null
@@ -0,0 +1,199 @@
+/* 
+ * Copyright (c) 2007 by NEC LE-IT:              All rights reserved.
+ * A transcription of ARMv6 atomic operations for the ARM Realview Toolchain.
+ * This code works with armcc from RVDS 3.1
+ * This is based on work in gcc/arm.h by
+ *   Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
+ *   Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
+ *   Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
+ * 
+ *
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ */
+#include "../read_ordered.h"
+#include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */
+
+#if __TARGET_ARCH_ARM < 6
+Dont use with ARM instruction sets lower than v6
+#endif
+
+/* NEC LE-IT: ARMv6 is the first architecture providing support for simple LL/SC
+ * A data memory barrier must be raised via CP15 command (see documentation).  
+ *                                                                                     
+ * ARMv7 is compatible to ARMv6 but has a simpler command for issuing a                
+ * memory barrier (DMB). Raising it via CP15 should still work as told me by the
+ * support engineers. If it turns out to be much quicker than we should implement
+ * custom code for ARMv7 using the asm { dmb } command.                                        
+ *
+ * If only a single processor is used, we can define AO_UNIPROCESSOR
+ * and do not need to access CP15 for ensuring a DMB at all.
+*/
+
+AO_INLINE void
+AO_nop_full()
+{
+# ifndef AO_UNIPROCESSOR
+    unsigned int dest=0;
+    /* issue an data memory barrier (keeps ordering of memory transactions     */
+    /* before and after this operation)                                                */
+       __asm { mcr p15,0,dest,c7,c10,5 } ;
+# endif
+}
+
+#define AO_HAVE_nop_full
+
+AO_INLINE AO_t
+AO_load(volatile AO_t *addr)
+{
+  /* Cast away the volatile in case it adds fence semantics.           */
+  return (*(AO_t *)addr);
+}
+#define AO_HAVE_load
+
+/* NEC LE-IT: atomic "store" - according to ARM documentation this is
+ * the only safe way to set variables also used in LL/SC environment.
+ * A direct write won't be recognized by the LL/SC construct in other CPUs.
+ *
+ * HB: Based on subsequent discussion, I think it would be OK to use an
+ * ordinary store here if we knew that interrupt handlers always cleared
+ * the reservation.  They should, but there is some doubt that this is
+ * currently always the case for e.g. Linux.
+*/
+AO_INLINE void AO_store(volatile AO_t *addr, AO_t value)
+{
+       unsigned long tmp;
+       
+retry:
+__asm {        
+               ldrex   tmp, [addr]
+               strex   tmp, value, [addr]
+               teq     tmp, #0
+               bne     retry
+         };
+}
+#define AO_HAVE_store
+
+/* NEC LE-IT: replace the SWAP as recommended by ARM:
+
+   "Applies to: ARM11 Cores
+       Though the SWP instruction will still work with ARM V6 cores, it is recommended
+       to use the new V6 synchronization instructions. The SWP instruction produces
+       locked read and write accesses which are atomic, i.e. another operation cannot
+       be done between these locked accesses which ties up external bus (AHB,AXI)
+       bandwidth and can increase worst case interrupt latencies. LDREX,STREX are
+       more flexible, other instructions can be done between the LDREX and STREX accesses. 
+   "
+*/
+AO_INLINE AO_TS_t
+AO_test_and_set(volatile AO_TS_t *addr) {
+       
+       AO_TS_t oldval;
+       unsigned long tmp;
+       unsigned long one = 1;
+retry:
+__asm {        
+               ldrex   oldval, [addr]
+               strex   tmp, one, [addr]
+               teq             tmp, #0
+               bne     retry
+         }
+
+       return oldval;
+}
+
+#define AO_HAVE_test_and_set
+
+/* NEC LE-IT: fetch and add for ARMv6 */
+AO_INLINE AO_t
+AO_fetch_and_add(volatile AO_t *p, AO_t incr)
+{
+       unsigned long tmp,tmp2;
+       AO_t result;
+
+retry:
+__asm {
+       ldrex   result, [p]
+       add     tmp, incr, result
+       strex   tmp2, tmp, [p]
+       teq     tmp2, #0
+       bne     retry }
+
+       return result;
+}
+
+#define AO_HAVE_fetch_and_add
+
+/* NEC LE-IT: fetch and add1 for ARMv6 */
+AO_INLINE AO_t
+AO_fetch_and_add1(volatile AO_t *p)
+{
+       unsigned long tmp,tmp2;
+       AO_t result;
+
+retry:
+__asm {
+       ldrex   result, [p]
+       add     tmp, result, #1
+       strex   tmp2, tmp, [p]
+       teq             tmp2, #0
+       bne     retry
+       }
+
+       return result;
+}
+
+#define AO_HAVE_fetch_and_add1
+
+/* NEC LE-IT: fetch and sub for ARMv6 */
+AO_INLINE AO_t
+AO_fetch_and_sub1(volatile AO_t *p)
+{
+       unsigned long tmp,tmp2;
+       AO_t result;
+
+retry:
+__asm {
+       ldrex   result, [p]
+       sub     tmp, result, #1
+       strex   tmp2, tmp, [p]
+       teq             tmp2, #0
+       bne     retry
+       }
+
+       return result;
+}
+
+#define AO_HAVE_fetch_and_sub1
+
+/* NEC LE-IT: compare and swap */
+/* Returns nonzero if the comparison succeeded. */
+AO_INLINE int
+AO_compare_and_swap(volatile AO_t *addr,
+                               AO_t old_val, AO_t new_val) 
+{
+        AO_t result,tmp;
+
+retry:
+__asm__ {
+       ldrex   tmp, [addr]
+       mov             result, #2
+       teq             tmp, old_val
+       strexeq result, new_val, [addr]
+       teq             result, #1
+       beq             retry
+       }
+       
+       return (result^2)>>1;
+}
+#define AO_HAVE_compare_and_swap
+
+#endif // __TARGET_ARCH_ARM
index d1a323216db1c6b7538f9d25b0589f9f664217d3..c502d4d4405367eb9ec917bb485924a2b97cb34f 100644 (file)
  *
  */
 
-/* There exist multiprocessor SoC ARM processors, so this matters.     */
-/* This needs to be augmented for later ARM (e.g. V7) procesors.       */
+#include "../read_ordered.h"
+
+#include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */
+
+/* NEC LE-IT: ARMv6 is the first architecture providing support for simple LL/SC
+ * A data memory barrier must be raised via CP15 command (see documentation).  
+ *                                                                                                                                                             
+ * ARMv7 is compatible to ARMv6 but has a simpler command for issuing a                
+ * memory barrier (DMB). Raising it via CP15 should still work as told me by the
+ * support engineers. If it turns out to be much quicker than we should implement
+ * custom code for ARMv7 using the asm { dmb } command.                                                                                                                
+ *
+ * If only a single processor is used, we can define AO_UNIPROCESSOR
+ * and do not need to access CP15 for ensuring a DMB  
+*/
+
+/* NEC LE-IT: gcc has no way to easily check the arm architecture
+ * but defines only one of __ARM_ARCH_x__ to be true                   */
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_7__)  
+AO_INLINE void
+AO_nop_full()
+{
+#ifndef AO_UNIPROCESSOR
+       /* issue an data memory barrier (keeps ordering of memory transactions  */
+       /* before and after this operation)                                     */
+       unsigned int dest=0;
+       __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
+#endif
+}
+
+#define AO_HAVE_nop_full
+
+/* NEC LE-IT: AO_t load is simple reading */
+AO_INLINE AO_t
+AO_load(volatile AO_t *addr)
+{
+  /* Cast away the volatile for architectures like IA64 where  */
+  /* volatile adds barrier semantics.                          */
+  return (*(AO_t *)addr);
+}
+#define AO_HAVE_load
+
+/* NEC LE-IT: atomic "store" - according to ARM documentation this is
+ * the only safe way to set variables also used in LL/SC environment.
+ * A direct write won't be recognized by the LL/SC construct on the _same_ CPU.
+ * Support engineers response for behaviour of ARMv6:
+ * 
+   Core1        Core2          SUCCESS
+   ===================================
+   LDREX(x)
+   STREX(x)                    Yes
+   -----------------------------------
+   LDREX(x)
+                STR(x)
+   STREX(x)                    No
+   -----------------------------------
+   LDREX(x)
+   STR(x)
+   STREX(x)                    Yes
+   -----------------------------------
+   
+ * ARMv7 behaves similar, see documentation CortexA8 TRM, point 8.5  
+ *
+ * HB: I think this is only a problem if interrupt handlers do not clear
+ * the reservation, as they almost certainly should.  Probably change this back
+ * in a while?
+*/
+AO_INLINE void AO_store(volatile AO_t *addr, AO_t value)
+{
+       unsigned long tmp;
+
+       __asm__ __volatile__("@AO_store\n"
+"1:    ldrex   %0, [%1]\n"
+"      strex   %0, %2, [%1]\n"
+"      teq     %0, #0\n"
+"      bne     1b"
+       : "=&r"(tmp)
+       : "r" (addr), "r"(value)
+       : "cc","memory");
+}
+#define AO_HAVE_store
+
+/* NEC LE-IT: replace the SWAP as recommended by ARM:
+
+   "Applies to: ARM11 Cores
+       Though the SWP instruction will still work with ARM V6 cores, it is
+       recommended     to use the new V6 synchronization instructions. The SWP
+       instruction produces ‘locked’ read and write accesses which are atomic,
+       i.e. another operation cannot be done between these locked accesses which
+       ties up external bus (AHB,AXI) bandwidth and can increase worst case 
+       interrupt latencies. LDREX,STREX are more flexible, other instructions can
+       be done between the LDREX and STREX accesses. 
+   "
+*/
+AO_INLINE AO_TS_t
+AO_test_and_set(volatile AO_TS_t *addr) {
+       
+       AO_TS_t oldval;
+       unsigned long tmp;
+
+       __asm__ __volatile__("@AO_test_and_set\n"
+"1:    ldrex   %0, [%2]\n"
+"      strex   %1, %3, [%2]\n"
+"      teq     %1, #0\n"
+"      bne     1b\n"
+       : "=&r"(oldval),"=&r"(tmp)
+       : "r"(addr), "r"(1)
+       : "memory","cc");
+
+       return oldval;
+}
+
+#define AO_HAVE_test_and_set
+
+/* NEC LE-IT: fetch and add for ARMv6 */
+AO_INLINE AO_t
+AO_fetch_and_add(volatile AO_t *p, AO_t incr)
+{
+       unsigned long tmp,tmp2;
+       AO_t result;
+
+       __asm__ __volatile__("@AO_fetch_and_add\n"
+"1:    ldrex   %0, [%4]\n"                     /* get original                   */
+"      add     %2, %3, %0\n"           /* sum up */
+"      strex   %1, %2, [%4]\n"         /* store them */
+"      teq     %1, #0\n"
+"      bne     1b\n"
+       : "=&r"(result),"=&r"(tmp),"=&r"(tmp2)
+       : "r"(incr), "r"(p)
+       : "cc","memory");
+
+       return result;
+}
+
+#define AO_HAVE_fetch_and_add
+
+/* NEC LE-IT: fetch and add1 for ARMv6 */
+AO_INLINE AO_t
+AO_fetch_and_add1(volatile AO_t *p)
+{
+       unsigned long tmp,tmp2;
+       AO_t result;
+
+       __asm__ __volatile__("@AO_fetch_and_add1\n"
+"1:    ldrex   %0, [%3]\n"                     /* get original   */
+"      add     %1, %0, #1\n"           /* increment */
+"      strex   %2, %1, [%3]\n"         /* store them */
+"      teq     %2, #0\n"
+"      bne     1b\n"
+       : "=&r"(result), "=&r"(tmp), "=&r"(tmp2)
+       : "r"(p)
+       : "cc","memory");
+
+       return result;
+}
+
+#define AO_HAVE_fetch_and_add1
+
+/* NEC LE-IT: fetch and sub for ARMv6 */
+AO_INLINE AO_t
+AO_fetch_and_sub1(volatile AO_t *p)
+{
+       unsigned long tmp,tmp2;
+       AO_t result;
+
+       __asm__ __volatile__("@ AO_fetch_and_sub1\n"
+"1:    ldrex   %0, [%3]\n"                     /* get original   */
+"      sub     %1, %0, #1\n"           /* increment */
+"      strex   %2, %1, [%3]\n"         /* store them */
+"      teq     %2, #0\n"
+"      bne     1b\n"
+       : "=&r"(result), "=&r"(tmp), "=&r"(tmp2)
+       : "r"(p)
+       : "cc","memory");
+
+       return result;
+}
+
+#define AO_HAVE_fetch_and_sub1
 
+/* NEC LE-IT: compare and swap */
+/* Returns nonzero if the comparison succeeded. */
+AO_INLINE int
+AO_compare_and_swap(volatile AO_t *addr,
+                               AO_t old_val, AO_t new_val) 
+{
+        AO_t result,tmp;
+
+       __asm__ __volatile__("@ AO_compare_and_swap\n"
+"1:    ldrex   %1, [%2]\n"                     /* get original */
+"      mov             %0, #2\n"                       /* store a flag */
+"      teq             %1, %3\n"                       /* see if match */
+"      strexeq %0, %4, [%2]\n"         /* store new one if matched */
+"      teq             %0, #1\n"
+"      beq             1b\n"                           /* if update failed, repeat */
+"      eor             %0, %0, #2\n"           /* if succeded, return 2, else 0 */
+       : "=&r"(result), "=&r"(tmp)
+       : "r"(addr), "r"(old_val), "r"(new_val)
+       : "cc","memory");
+
+       return (result>>1);
+}
+#define AO_HAVE_compare_and_swap
+
+#else
+/* pre ARMv6 architecures ... */
 /* I found a slide set that, if I read it correctly, claims that       */
 /* Loads followed by either a Load or Store are ordered, but nothing   */
 /* else is.                                                            */
 /* It appears that SWP is the only simple memory barrier.              */
 #include "../all_atomic_load_store.h"
 
-#include "../read_ordered.h"
-
-#include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */
-
-
 AO_INLINE AO_TS_VAL_t
 AO_test_and_set_full(volatile AO_TS_t *addr) {
   AO_TS_VAL_t oldval;
@@ -48,5 +247,4 @@ AO_test_and_set_full(volatile AO_TS_t *addr) {
 
 #define AO_HAVE_test_and_set_full
 
-
-
+#endif // __ARM_ARCH_x
index 75c2448cf1a2846dcad370b3d9c2032a7bede39d..91e99e62040394e7fa122f55fc1e3c579c8530e0 100644 (file)
@@ -34,6 +34,8 @@
 
 #include "../test_and_set_t_is_char.h"
 
+#include "../standard_ao_double_t.h"
+
 #if defined(AO_USE_PENTIUM4_INSTRS)
 AO_INLINE void
 AO_nop_full()
@@ -133,7 +135,7 @@ AO_test_and_set_full(volatile AO_TS_t *addr)
 /* Returns nonzero if the comparison succeeded. */
 AO_INLINE int
 AO_compare_and_swap_full(volatile AO_t *addr,
-                            AO_t old, AO_t new_val) 
+                        AO_t old, AO_t new_val) 
 {
   char result;
   __asm__ __volatile__("lock; cmpxchgq %3, %0; setz %1"
@@ -144,4 +146,50 @@ AO_compare_and_swap_full(volatile AO_t *addr,
 
 #define AO_HAVE_compare_and_swap_full
 
-/* FIXME: The Intel version has a 16byte CAS instruction.      */
+#ifdef AO_CMPXCHG16B_AVAILABLE
+/* NEC LE-IT: older AMD Opterons are missing this instruction.
+ * On these machines SIGILL will be thrown. Define AO_CASDOUBLE_MISSING
+ * to have an emulated (lock based) version available */ 
+/* HB: Changed this to not define either by default.  There are
+ * enough machines and tool chains around on which cmpxchg16b
+ * doesn't work.  And the emulation is unsafe by our usual rules.
+ * Hoewever both are clearly useful in certain cases.
+ */
+AO_INLINE int
+AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
+                                      AO_t old_val1, AO_t old_val2,
+                                      AO_t new_val1, AO_t new_val2)
+{
+  char result;
+  __asm__ __volatile__("lock; cmpxchg16b %0; setz %1"
+                               : "=m"(*addr), "=q"(result)
+                                       : "m"(*addr),
+                                         "d" (old_val1),
+                                         "a" (old_val2),
+                                         "c" (new_val1),
+                                         "b" (new_val2)  : "memory");
+  return (int) result;
+}
+#define AO_HAVE_compare_double_and_swap_double_full
+#else
+/* this one provides spinlock based emulation of CAS implemented in    */
+/* atomic_ops.c.  We probably do not want to do this here, since it is  */
+/* not attomic with respect to other kinds of updates of *addr.  On the */
+/* other hand, this may be a useful facility on occasion.              */
+#ifdef AO_WEAK_DOUBLE_CAS_EMULATION
+int AO_compare_double_and_swap_double_emulation(volatile AO_double_t *addr,
+                                               AO_t old_val1, AO_t old_val2,
+                                               AO_t new_val1, AO_t new_val2);
+
+AO_INLINE int
+AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
+                                      AO_t old_val1, AO_t old_val2,
+                                      AO_t new_val1, AO_t new_val2)
+{
+       return AO_compare_double_and_swap_double_emulation(addr,
+                                                          old_val1, old_val2,
+                                                          new_val1, new_val2);
+}
+#define AO_HAVE_compare_double_and_swap_double_full
+#endif /* AO_WEAK_DOUBLE_CAS_EMULATION */
+#endif /* AO_CMPXCHG16B_AVAILABLE */
index 7f0b647269467dfae9e84b69ebb19a6cd93b78b1..ca1a682e1e64eb7a6cbcbb0e2f464b26586e59a6 100644 (file)
@@ -152,5 +152,35 @@ AO_test_and_set_full(volatile AO_TS_t *addr)
 }
 
 #define AO_HAVE_test_and_set_full
-#endif
+
+FIXME: (__asm not supported)
+NEC LE-IT: Don't have a working Win64 environment here at the moment.
+AO_compare_double_and_swap_double_full needs implementation for Win64
+But there is no _InterlockedCompareExchange128 in the WinAPI, so we
+need basically whats given below.
+Also see gcc/x86_64.h for partial old opteron workaround:
+
+#ifndef AO_CASDOUBLE_MISSING
+
+AO_INLINE int
+AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
+                                      AO_t old_val1, AO_t old_val2,
+                                      AO_t new_val1, AO_t new_val2)
+{
+       char result;
+       __asm
+       {
+               mov     rdx,QWORD PTR [old_val]
+               mov     rax,QWORD PTR [old_val + 8]
+               mov     rcx,QWORD PTR [new_val]
+               mov     rbx,QWORD PTR [new_val + 8]
+               lock cmpxchg16b [addr]
+               setz result;
+       }
+       return result;
+}
+#endif // AO_CASDOUBLE_MISSING
+#define AO_HAVE_compare_double_and_swap_double_full
+
+#endif /* 0 */
 
index 1d08fc7fd390dcbb8cd20a71961be15fd5b5a04b..52701c528c762bfd979182b916ab4a812cdd9b1c 100644 (file)
@@ -1,8 +1,35 @@
+/* NEC LE-IT: For 64Bit OS we extend the double type to hold two int64's
+*      
+*  x86-64: __m128 serves as placeholder which also requires the compiler
+*         to align     it on 16 byte boundary (as required by cmpxchg16.
+* Similar things could be done for PowerPC 64bit using a VMX data type...      */
+
+#if defined(__GNUC__)
+# if defined(__i386__)
+   typedef unsigned long long double_ptr_storage;
+# endif /* __i386__ */
+# if defined(__arm__)
+   typedef unsigned long long double_ptr_storage;
+# endif /* __arm__ */
+# if defined(__x86_64__)
+# include<xmmintrin.h>
+   typedef __m128      double_ptr_storage;
+# endif /* __x86_64__ */
+#endif
+
+#ifdef _MSC_VER
+# ifdef _WIN64
+   typedef __m128      double_ptr_storage;
+# elif _WIN32
+   typedef unsigned __int64    double_ptr_storage;
+# endif
+#endif
+
 typedef union {
-    unsigned long long AO_whole;
+       double_ptr_storage                               AO_whole;
     struct {AO_t AO_v1; AO_t AO_v2;} AO_parts;
 } AO_double_t;
+
 #define AO_HAVE_double_t
 #define AO_val1 AO_parts.AO_v1
 #define AO_val2 AO_parts.AO_v2
-