From 929954ab7257736bf233613fb0896da78a375ea9 Mon Sep 17 00:00:00 2001 From: hboehm Date: Wed, 12 Aug 2009 22:33:52 +0000 Subject: [PATCH] 2009-08-12 Hans Boehm (Really Ivan Maidanski) (diff107_cvs) * src/atomic_ops/sysdeps/sunc/x86.h: New file. * src/atomic_ops/sysdeps/sunc/x86_64.h: Ditto. * src/atomic_ops.h (AO_INLINE): Support inlining for DigitalMars, Watcom, Sun C. * src/atomic_ops.h (AO_compiler_barrier): Use intrinsic-based implementation for VC++ v8+ (include before it unless WinCE target); use asm-based barrier implementation for Borland, DigitalMars and Watcom. * src/atomic_ops.h: Fix comment (for x86_64). * src/atomic_ops.h: Include specialized x86.h and x86_64.h arch headers for Sun C (if not AO_USE_PTHREAD_DEFS). * src/atomic_ops.h: Include VC-specific arch headers for Borland, DigitalMars and Watcom (Win32 target only). --- ChangeLog | 17 +++ src/atomic_ops.h | 30 ++++- src/atomic_ops/sysdeps/sunc/x86.h | 171 +++++++++++++++++++++++++ src/atomic_ops/sysdeps/sunc/x86_64.h | 184 +++++++++++++++++++++++++++ 4 files changed, 397 insertions(+), 5 deletions(-) create mode 100644 src/atomic_ops/sysdeps/sunc/x86.h create mode 100644 src/atomic_ops/sysdeps/sunc/x86_64.h diff --git a/ChangeLog b/ChangeLog index bf85ced..4965e3e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2009-08-12 Hans Boehm (Really Ivan Maidanski) + (diff107_cvs) + + * src/atomic_ops/sysdeps/sunc/x86.h: New file. + * src/atomic_ops/sysdeps/sunc/x86_64.h: Ditto. + * src/atomic_ops.h (AO_INLINE): Support inlining for DigitalMars, + Watcom, Sun C. + * src/atomic_ops.h (AO_compiler_barrier): Use intrinsic-based + implementation for VC++ v8+ (include before it unless + WinCE target); use asm-based barrier implementation for Borland, + DigitalMars and Watcom. + * src/atomic_ops.h: Fix comment (for x86_64). + * src/atomic_ops.h: Include specialized x86.h and x86_64.h arch + headers for Sun C (if not AO_USE_PTHREAD_DEFS). + * src/atomic_ops.h: Include VC-specific arch headers for Borland, + DigitalMars and Watcom (Win32 target only). + 2009-05-27 Hans Boehm (Really Ivan Maidanski) (diff87_cvs, resembling diff29, diff68, diff78 partly) * doc/README.txt: Remove outdated info about Windows support. diff --git a/src/atomic_ops.h b/src/atomic_ops.h index 04bf39b..006d51d 100755 --- a/src/atomic_ops.h +++ b/src/atomic_ops.h @@ -147,16 +147,25 @@ #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR /* Platform-dependent stuff: */ -#if defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) +#if defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \ + || defined(__DMC__) || defined(__WATCOMC__) # define AO_INLINE static __inline +#elif defined(__sun) +# define AO_INLINE static inline #else # define AO_INLINE static #endif #if defined(__GNUC__) && !defined(__INTEL_COMPILER) # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory") -#elif defined(_MSC_VER) -# if defined(_AMD64_) +#elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \ + || defined(__WATCOMC__) +# if defined(_AMD64_) || _MSC_VER >= 1400 +# if defined(_WIN32_WCE) +/* # include */ +# elif defined(_MSC_VER) +# include +# endif # pragma intrinsic(_ReadWriteBarrier) # define AO_compiler_barrier() _ReadWriteBarrier() /* We assume this does not generate a fence instruction. */ @@ -195,7 +204,7 @@ # endif /* __i386__ */ # if defined(__x86_64__) # include "atomic_ops/sysdeps/gcc/x86_64.h" -# endif /* __i386__ */ +# endif /* __x86_64__ */ # if defined(__ia64__) # include "atomic_ops/sysdeps/gcc/ia64.h" # define AO_GENERALIZE_TWICE @@ -251,13 +260,24 @@ # endif #endif +#if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) + /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */ +# if defined(__i386) +# include "atomic_ops/sysdeps/sunc/x86.h" +# endif /* __i386 */ +# if defined(__x86_64) || defined(__amd64) +# include "atomic_ops/sysdeps/sunc/x86_64.h" +# endif /* __x86_64 */ +#endif + #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \ && !defined(AO_USE_PTHREAD_DEFS) # include "atomic_ops/sysdeps/sunc/sparc.h" # define AO_CAN_EMUL_CAS #endif -#if defined(_MSC_VER) +#if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \ + || (defined(__WATCOMC__) && defined(__NT__)) # if defined(_AMD64_) # include "atomic_ops/sysdeps/msftc/x86_64.h" # elif _M_IX86 >= 400 diff --git a/src/atomic_ops/sysdeps/sunc/x86.h b/src/atomic_ops/sysdeps/sunc/x86.h new file mode 100644 index 0000000..8cf797a --- /dev/null +++ b/src/atomic_ops/sysdeps/sunc/x86.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. + * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. + * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. + * + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program + * for any purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is granted, + * provided the above notices are retained, and a notice that the code was + * modified is included with the above copyright notice. + * + * Some of the machine specific code was borrowed from our GC distribution. + */ + +/* The following really assume we have a 486 or better. */ + +#include "../all_aligned_atomic_load_store.h" + +/* Real X86 implementations, except for some old WinChips, appear */ +/* to enforce ordering between memory operations, EXCEPT that a later */ +/* read can pass earlier writes, presumably due to the visible */ +/* presence of store buffers. */ +/* We ignore both the WinChips, and the fact that the official specs */ +/* seem to be much weaker (and arguably too weak to be usable). */ + +#include "../ordered_except_wr.h" + +#include "../test_and_set_t_is_char.h" + +#include "../standard_ao_double_t.h" + +#if defined(AO_USE_PENTIUM4_INSTRS) +AO_INLINE void +AO_nop_full(void) +{ + __asm__ __volatile__("mfence" : : : "memory"); +} + +#define AO_HAVE_nop_full + +#else + +/* We could use the cpuid instruction. But that seems to be slower */ +/* than the default implementation based on test_and_set_full. Thus */ +/* we omit that bit of misinformation here. */ + +#endif + +/* As far as we can tell, the lfence and sfence instructions are not */ +/* currently needed or useful for cached memory accesses. */ + +/* Really only works for 486 and later */ +AO_INLINE AO_t +AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) +{ + AO_t result; + + __asm__ __volatile__ ("lock; xaddl %0, %1" : + "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_fetch_and_add_full + +AO_INLINE unsigned char +AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) +{ + unsigned char result; + + __asm__ __volatile__ ("lock; xaddb %0, %1" : + "=q" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_char_fetch_and_add_full + +AO_INLINE unsigned short +AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) +{ + unsigned short result; + + __asm__ __volatile__ ("lock; xaddw %0, %1" : + "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_short_fetch_and_add_full + +/* Really only works for 486 and later */ +AO_INLINE void +AO_or_full (volatile AO_t *p, AO_t incr) +{ + __asm__ __volatile__ ("lock; orl %1, %0" : + "=m" (*p) : "r" (incr) /* , "m" (*p) */ + : "memory"); +} + +#define AO_HAVE_or_full + +AO_INLINE AO_TS_VAL_t +AO_test_and_set_full(volatile AO_TS_t *addr) +{ + AO_TS_t oldval; + /* Note: the "xchg" instruction does not need a "lock" prefix */ + /* Note 2: "xchgb" is not recognized by Sun CC assembler yet. */ + __asm__ __volatile__("xchgl %0, %1" + : "=q"(oldval), "=m"(*addr) + : "0"(0xff) /* , "m"(*addr) */ + : "memory"); + return (AO_TS_VAL_t)oldval; +} + +#define AO_HAVE_test_and_set_full + +/* Returns nonzero if the comparison succeeded. */ +AO_INLINE int +AO_compare_and_swap_full(volatile AO_t *addr, + AO_t old, AO_t new_val) +{ + char result; + __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" + : "=m"(*addr), "=q"(result) + : "r" (new_val), "a"(old) : "memory"); + return (int) result; +} + +#define AO_HAVE_compare_and_swap_full + +/* Returns nonzero if the comparison succeeded. */ +/* Really requires at least a Pentium. */ +AO_INLINE int +AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2) +{ + char result; + /* FIXME: not tested */ + #if __PIC__ + /* If PIC is turned on, we can't use %ebx as it is reserved for the + GOT pointer. We can save and restore %ebx because GCC won't be + using it for anything else (such as any of the m operands) */ + __asm__ __volatile__("pushl %%ebx;" /* save ebx used for PIC GOT ptr */ + "movl %6,%%ebx;" /* move new_val2 to %ebx */ + "lock; cmpxchg8b %0; setz %1;" + "pop %%ebx;" /* restore %ebx */ + : "=m"(*addr), "=q"(result) + : "m"(*addr), "d" (old_val2), "a" (old_val1), + "c" (new_val2), "m" (new_val1) : "memory"); + #else + /* We can't just do the same thing in non-PIC mode, because GCC + * might be using %ebx as the memory operand. We could have ifdef'd + * in a clobber, but there's no point doing the push/pop if we don't + * have to. */ + __asm__ __volatile__("lock; cmpxchg8b %0; setz %1;" + : "=m"(*addr), "=q"(result) + : /* "m"(*addr), */ "d" (old_val2), "a" (old_val1), + "c" (new_val2), "b" (new_val1) : "memory"); + #endif + return (int) result; +} + +#define AO_HAVE_compare_double_and_swap_double_full + +#include "../ao_t_is_int.h" diff --git a/src/atomic_ops/sysdeps/sunc/x86_64.h b/src/atomic_ops/sysdeps/sunc/x86_64.h new file mode 100644 index 0000000..72dd185 --- /dev/null +++ b/src/atomic_ops/sysdeps/sunc/x86_64.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. + * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. + * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. + * + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program + * for any purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is granted, + * provided the above notices are retained, and a notice that the code was + * modified is included with the above copyright notice. + * + * Some of the machine specific code was borrowed from our GC distribution. + */ + +#include "../all_aligned_atomic_load_store.h" + +/* Real X86 implementations, appear */ +/* to enforce ordering between memory operations, EXCEPT that a later */ +/* read can pass earlier writes, presumably due to the visible */ +/* presence of store buffers. */ +/* We ignore the fact that the official specs */ +/* seem to be much weaker (and arguably too weak to be usable). */ + +#include "../ordered_except_wr.h" + +#include "../test_and_set_t_is_char.h" + +#include "../standard_ao_double_t.h" + +AO_INLINE void +AO_nop_full(void) +{ + /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */ + __asm__ __volatile__("mfence" : : : "memory"); +} + +#define AO_HAVE_nop_full + +/* As far as we can tell, the lfence and sfence instructions are not */ +/* currently needed or useful for cached memory accesses. */ + +AO_INLINE AO_t +AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) +{ + AO_t result; + + __asm__ __volatile__ ("lock; xaddq %0, %1" : + "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_fetch_and_add_full + +AO_INLINE unsigned char +AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) +{ + unsigned char result; + + __asm__ __volatile__ ("lock; xaddb %0, %1" : + "=q" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_char_fetch_and_add_full + +AO_INLINE unsigned short +AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) +{ + unsigned short result; + + __asm__ __volatile__ ("lock; xaddw %0, %1" : + "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_short_fetch_and_add_full + +AO_INLINE unsigned int +AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr) +{ + unsigned int result; + + __asm__ __volatile__ ("lock; xaddl %0, %1" : + "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ + : "memory"); + return result; +} + +#define AO_HAVE_int_fetch_and_add_full + +AO_INLINE void +AO_or_full (volatile AO_t *p, AO_t incr) +{ + __asm__ __volatile__ ("lock; orq %1, %0" : + "=m" (*p) : "r" (incr) /* , "m" (*p) */ + : "memory"); +} + +#define AO_HAVE_or_full + +AO_INLINE AO_TS_VAL_t +AO_test_and_set_full(volatile AO_TS_t *addr) +{ + unsigned int oldval; + /* Note: the "xchg" instruction does not need a "lock" prefix */ + /* Note 2: "xchgb" is not recognized by Sun CC assembler yet. */ + __asm__ __volatile__("xchgl %0, %1" + : "=q"(oldval), "=m"(*addr) + : "0"(0xff) /* , "m"(*addr) */ + : "memory"); + return (AO_TS_VAL_t)oldval; +} + +#define AO_HAVE_test_and_set_full + +/* Returns nonzero if the comparison succeeded. */ +AO_INLINE int +AO_compare_and_swap_full(volatile AO_t *addr, + AO_t old, AO_t new_val) +{ + char result; + __asm__ __volatile__("lock; cmpxchgq %2, %0; setz %1" + : "=m"(*addr), "=q"(result) + : "r" (new_val), "a"(old) : "memory"); + return (int) result; +} + +#define AO_HAVE_compare_and_swap_full + +#ifdef AO_CMPXCHG16B_AVAILABLE +/* NEC LE-IT: older AMD Opterons are missing this instruction. + * On these machines SIGILL will be thrown. + * Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated + * (lock based) version available */ +/* HB: Changed this to not define either by default. There are + * enough machines and tool chains around on which cmpxchg16b + * doesn't work. And the emulation is unsafe by our usual rules. + * Hoewever both are clearly useful in certain cases. + */ +AO_INLINE int +AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2) +{ + char result; + __asm__ __volatile__("lock; cmpxchg16b %0; setz %1" + : "=m"(*addr), "=q"(result) + : "m"(*addr), + "d" (old_val2), + "a" (old_val1), + "c" (new_val2), + "b" (new_val1) : "memory"); + return (int) result; +} +#define AO_HAVE_compare_double_and_swap_double_full +#else +/* this one provides spinlock based emulation of CAS implemented in */ +/* atomic_ops.c. We probably do not want to do this here, since it is */ +/* not atomic with respect to other kinds of updates of *addr. On the */ +/* other hand, this may be a useful facility on occasion. */ +#ifdef AO_WEAK_DOUBLE_CAS_EMULATION +int AO_compare_double_and_swap_double_emulation(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2); + +AO_INLINE int +AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, + AO_t old_val1, AO_t old_val2, + AO_t new_val1, AO_t new_val2) +{ + return AO_compare_double_and_swap_double_emulation(addr, + old_val1, old_val2, + new_val1, new_val2); +} +#define AO_HAVE_compare_double_and_swap_double_full +#endif /* AO_WEAK_DOUBLE_CAS_EMULATION */ +#endif /* AO_CMPXCHG16B_AVAILABLE */ -- 2.40.0