From: hboehm <hboehm>
Date: Fri, 4 Jan 2008 00:45:32 +0000 (+0000)
Subject: 2008-01-03 Hans Boehm <Hans.Boehm@hp.com>
X-Git-Tag: gc7_1alpha2~3
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8bb45851c47e376b6c2ef29cadd56f6ba49f959e;p=libatomic_ops

2008-01-03 Hans Boehm <Hans.Boehm@hp.com>
	(Merge from separate atomic_ops tree)
	* src/atomic_ops/sysdeps/gcc/x86.h: Define correct macro for
	double-width cas, and fix its implementation.
	* doc/README.txt: Clarify use of _full.  Add more warnings about
	data dependencies.
---

diff --git a/ChangeLog b/ChangeLog
index aa079ec..ce3afe9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-01-03 Hans Boehm <Hans.Boehm@hp.com>
+	(Merge from separate atomic_ops tree)
+	* src/atomic_ops/sysdeps/gcc/x86.h: Define correct macro for
+	double-width cas, and fix its implementation.
+	* doc/README.txt: Clarify use of _full.  Add more warnings about
+	data dependencies.
+
 2008-01-02 Hans Boehm <Hans.Boehm@hp.com>
 	* src/atomic_ops/sysdeps/gcc/powerpc.h (AO_load_acquire): Add
 	%X1 modifier to support indexed addressing.
diff --git a/doc/README.txt b/doc/README.txt
index fa8f07e..e7c2f0d 100644
--- a/doc/README.txt
+++ b/doc/README.txt
@@ -154,6 +154,8 @@ _read: Subsequent reads must become visible after reads included in
 _write: Earlier writes become visible before writes during or after
         the atomic operation.  Rarely useful for clients?
 _full: Ordered with respect to both earlier and later memops.
+       AO_store_full or AO_nop_full are the normal ways to force a store
+       to be ordered with respect to a later load.
 _release_write: Ordered with respect to earlier writes.  This is
 	        normally implemented as either a _write or _release
 		barrier.
@@ -163,7 +165,11 @@ _dd_acquire_read: Ordered with respect to later reads that are data
 	       second value, with the expectation that the second
 	       read is ordered after the first one.  On most architectures,
 	       this is equivalent to no barrier.  (This is very
-	       hard to define precisely.  It should probably be avoided.)
+	       hard to define precisely.  It should probably be avoided.
+	       A major problem is that optimizers tend to try to
+	       eliminate dependencies from the generated code, since
+	       dependencies force the hardware to execute the code
+	       serially.)
 _release_read: Ordered with respect to earlier reads.  Useful for
 	       implementing read locks.  Can be implemented as _release,
 	       but not as _read, since _read groups the current operation
diff --git a/src/atomic_ops/sysdeps/gcc/x86.h b/src/atomic_ops/sysdeps/gcc/x86.h
index 27e047e..0b344a7 100644
--- a/src/atomic_ops/sysdeps/gcc/x86.h
+++ b/src/atomic_ops/sysdeps/gcc/x86.h
@@ -141,13 +141,17 @@ AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
 			               AO_t new_val1, AO_t new_val2) 
 {
   char result;
+  register AO_t nv1 asm("%ebx") = new_val1;
+  	/* The above hack seems to avoid a gcc error complaining	*/
+  	/* that ebx is unavailable.					*/
+
   __asm__ __volatile__("lock; cmpxchg8b %0; setz %1"
 	    	       : "=m"(*addr), "=q"(result)
-		       : "m"(*addr), "d" (old_val1), "a" (old_val2),
-		         "c" (new_val1), "b" (new_val2) : "memory");
+		       : "m"(*addr), "a" (old_val1), "d" (old_val2),
+		         "b" (nv1), "c" (new_val2) : "memory");
   return (int) result;
 }
 
-#define AO_HAVE_double_compare_and_swap_full
+#define AO_HAVE_compare_double_and_swap_double_full
 
 #include "../ao_t_is_int.h"