Add comment about double-wide load/store on x86_64 (GCC)

author Ivan Maidanski <ivmai@mail.ru>

Mon, 8 Apr 2013 19:52:44 +0000 (23:52 +0400)

committer Ivan Maidanski <ivmai@mail.ru>

Mon, 8 Apr 2013 19:53:15 +0000 (23:53 +0400)
author Ivan Maidanski <ivmai@mail.ru>
Mon, 8 Apr 2013 19:52:44 +0000 (23:52 +0400)
committer Ivan Maidanski <ivmai@mail.ru>
Mon, 8 Apr 2013 19:53:15 +0000 (23:53 +0400)
diff --git a/src/atomic_ops/sysdeps/gcc/x86.h b/src/atomic_ops/sysdeps/gcc/x86.h

index 9ce3949c95b6802cad838098acfc310b2b4c4209..62b50c9d16aa89a6f999b451d580c900c52fa378 100644 (file)
--- a/src/atomic_ops/sysdeps/gcc/x86.h
+++ b/src/atomic_ops/sysdeps/gcc/x86.h
@@ -282,7 +282,18 @@ AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
    }
  # define AO_HAVE_int_fetch_and_add_full
  
-/* TODO: Implement double_load/store. */
+  /* The Intel and AMD Architecture Programmer Manuals state roughly    */
+  /* the following:                                                     */
+  /* - CMPXCHG16B (with a LOCK prefix) can be used to perform 16-byte   */
+  /* atomic accesses in 64-bit mode (with certain alignment             */
+  /* restrictions);                                                     */
+  /* - SSE instructions that access data larger than a quadword (like   */
+  /* MOVDQA) may be implemented using multiple memory accesses;         */
+  /* - LOCK prefix causes an invalid-opcode exception when used with    */
+  /* 128-bit media (SSE) instructions.                                  */
+  /* Thus, currently, the only way to implement lock-free double_load   */
+  /* and double_store on x86_64 is to use CMPXCHG16B (if available).    */
+
  /* TODO: Test some gcc macro to detect presence of cmpxchg16b. */
  
  # ifdef AO_CMPXCHG16B_AVAILABLE
author	Ivan Maidanski <ivmai@mail.ru>
	Mon, 8 Apr 2013 19:52:44 +0000 (23:52 +0400)
committer	Ivan Maidanski <ivmai@mail.ru>
	Mon, 8 Apr 2013 19:53:15 +0000 (23:53 +0400)