1 /*-------------------------------------------------------------------------
4 * Hardware-dependent implementation of spinlocks.
6 * NOTE: none of the macros in this file are intended to be called directly.
7 * Call them through the hardware-independent macros in spin.h.
9 * The following hardware-dependent macros must be provided for each
12 * void S_INIT_LOCK(slock_t *lock)
13 * Initialize a spinlock (to the unlocked state).
15 * void S_LOCK(slock_t *lock)
16 * Acquire a spinlock, waiting if necessary.
17 * Time out and abort() if unable to acquire the lock in a
18 * "reasonable" amount of time --- typically ~ 1 minute.
20 * void S_UNLOCK(slock_t *lock)
21 * Unlock a previously acquired lock.
23 * bool S_LOCK_FREE(slock_t *lock)
24 * Tests if the lock is free. Returns TRUE if free, FALSE if locked.
25 * This does *not* change the state of the lock.
27 * void SPIN_DELAY(void)
28 * Delay operation to occur inside spinlock wait loop.
30 * Note to implementors: there are default implementations for all these
31 * macros at the bottom of the file. Check if your platform can use
32 * these or needs to override them.
34 * Usually, S_LOCK() is implemented in terms of an even lower-level macro
37 * int TAS(slock_t *lock)
38 * Atomic test-and-set instruction. Attempt to acquire the lock,
39 * but do *not* wait. Returns 0 if successful, nonzero if unable
40 * to acquire the lock.
42 * TAS() is NOT part of the API, and should never be called directly.
44 * CAUTION: on some platforms TAS() may sometimes report failure to acquire
45 * a lock even when the lock is not locked. For example, on Alpha TAS()
46 * will "fail" if interrupted. Therefore TAS() should always be invoked
47 * in a retry loop, even if you are certain the lock is free.
49 * ANOTHER CAUTION: be sure that TAS() and S_UNLOCK() represent sequence
50 * points, ie, loads and stores of other values must not be moved across
51 * a lock or unlock. In most cases it suffices to make the operation be
52 * done through a "volatile" pointer.
54 * On most supported platforms, TAS() uses a tas() function written
55 * in assembly language to execute a hardware atomic-test-and-set
56 * instruction. Equivalent OS-supplied mutex routines could be used too.
58 * If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
59 * defined), then we fall back on an emulation that uses SysV semaphores
60 * (see spin.c). This emulation will be MUCH MUCH slower than a proper TAS()
61 * implementation, because of the cost of a kernel call per lock or unlock.
62 * An old report is that Postgres spends around 40% of its time in semop(2)
63 * when using the SysV semaphore code.
66 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
67 * Portions Copyright (c) 1994, Regents of the University of California
69 * $PostgreSQL: pgsql/src/include/storage/s_lock.h,v 1.153 2006/05/17 23:57:03 momjian Exp $
71 *-------------------------------------------------------------------------
76 #include "storage/pg_sema.h"
78 #ifdef HAVE_SPINLOCKS /* skip spinlocks if requested */
81 #if defined(__GNUC__) || defined(__ICC)
82 /*************************************************************************
84 * Gcc consistently defines the CPU as __cpu__.
85 * Other compilers use __cpu or __cpu__ so we test for both in those cases.
89 * Standard gcc asm format (assuming "volatile slock_t *lock"):
95 : "=r"(_res), "+m"(*lock) // return register, in/out lock value
96 : "r"(lock) // lock pointer, in input register
97 : "memory", "cc"); // show clobbered registers here
99 * The output-operands list (after first colon) should always include
100 * "+m"(*lock), whether or not the asm code actually refers to this
101 * operand directly. This ensures that gcc believes the value in the
102 * lock variable is used and set by the asm code. Also, the clobbers
103 * list (after third colon) should always include "memory"; this prevents
104 * gcc from thinking it can cache the values of shared-memory fields
105 * across the asm code. Add "cc" if your asm code changes the condition
106 * code register, and also list any temp registers the code uses.
111 #ifdef __i386__ /* 32-bit i386 */
112 #define HAS_TEST_AND_SET
114 typedef unsigned char slock_t;
116 #define TAS(lock) tas(lock)
118 static __inline__ int
119 tas(volatile slock_t *lock)
121 register slock_t _res = 1;
124 * Use a non-locking test before asserting the bus lock. Note that the
125 * extra test appears to be a small loss on some x86 platforms and a small
126 * win on others; it's by no means clear that we should keep it.
128 __asm__ __volatile__(
134 : "+q"(_res), "+m"(*lock)
140 #define SPIN_DELAY() spin_delay()
142 static __inline__ void
146 * This sequence is equivalent to the PAUSE instruction ("rep" is
147 * ignored by old IA32 processors if the following instruction is
148 * not a string operation); the IA-32 Architecture Software
149 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
150 * PAUSE in the inner loop of a spin lock is necessary for good
153 * The PAUSE instruction improves the performance of IA-32
154 * processors supporting Hyper-Threading Technology when
155 * executing spin-wait loops and other routines where one
156 * thread is accessing a shared lock or semaphore in a tight
157 * polling loop. When executing a spin-wait loop, the
158 * processor can suffer a severe performance penalty when
159 * exiting the loop because it detects a possible memory order
160 * violation and flushes the core processor's pipeline. The
161 * PAUSE instruction provides a hint to the processor that the
162 * code sequence is a spin-wait loop. The processor uses this
163 * hint to avoid the memory order violation and prevent the
164 * pipeline flush. In addition, the PAUSE instruction
165 * de-pipelines the spin-wait loop to prevent it from
166 * consuming execution resources excessively.
168 __asm__ __volatile__(
172 #endif /* __i386__ */
175 #ifdef __x86_64__ /* AMD Opteron, Intel EM64T */
176 #define HAS_TEST_AND_SET
178 typedef unsigned char slock_t;
180 #define TAS(lock) tas(lock)
182 static __inline__ int
183 tas(volatile slock_t *lock)
185 register slock_t _res = 1;
188 * On Opteron, using a non-locking test before the locking instruction
189 * is a huge loss. On EM64T, it appears to be a wash or small loss,
190 * so we needn't bother to try to distinguish the sub-architectures.
192 __asm__ __volatile__(
195 : "+q"(_res), "+m"(*lock)
201 #define SPIN_DELAY() spin_delay()
203 static __inline__ void
207 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
208 * Opteron, but it may be of some use on EM64T, so we keep it.
210 __asm__ __volatile__(
214 #endif /* __x86_64__ */
217 #if defined(__ia64__) || defined(__ia64) /* Intel Itanium */
218 #define HAS_TEST_AND_SET
220 typedef unsigned int slock_t;
222 #define TAS(lock) tas(lock)
224 #ifndef __INTEL_COMPILER
226 static __inline__ int
227 tas(volatile slock_t *lock)
231 __asm__ __volatile__(
233 : "=r"(ret), "+m"(*lock)
239 #else /* __INTEL_COMPILER */
241 static __inline__ int
242 tas(volatile slock_t *lock)
246 ret = _InterlockedExchange(lock,1); /* this is a xchg asm macro */
251 #endif /* __INTEL_COMPILER */
252 #endif /* __ia64__ || __ia64 */
255 #if defined(__arm__) || defined(__arm)
256 #define HAS_TEST_AND_SET
258 typedef unsigned char slock_t;
260 #define TAS(lock) tas(lock)
262 static __inline__ int
263 tas(volatile slock_t *lock)
265 register slock_t _res = 1;
267 __asm__ __volatile__(
268 " swpb %0, %0, [%2] \n"
269 : "+r"(_res), "+m"(*lock)
278 /* S/390 and S/390x Linux (32- and 64-bit zSeries) */
279 #if defined(__s390__) || defined(__s390x__)
280 #define HAS_TEST_AND_SET
282 typedef unsigned int slock_t;
284 #define TAS(lock) tas(lock)
286 static __inline__ int
287 tas(volatile slock_t *lock)
291 __asm__ __volatile__(
293 : "+d"(_res), "+m"(*lock)
299 #endif /* __s390__ || __s390x__ */
302 #if defined(__sparc__) /* Sparc */
303 #define HAS_TEST_AND_SET
305 typedef unsigned char slock_t;
307 #define TAS(lock) tas(lock)
309 static __inline__ int
310 tas(volatile slock_t *lock)
312 register slock_t _res;
315 * See comment in /pg/backend/port/tas/solaris_sparc.s for why this
316 * uses "ldstub", and that file uses "cas".
318 __asm__ __volatile__(
319 " ldstub [%2], %0 \n"
320 : "=r"(_res), "+m"(*lock)
326 #endif /* __sparc__ */
330 #if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
331 #define HAS_TEST_AND_SET
333 #if defined(__ppc64__) || defined(__powerpc64__)
334 typedef unsigned long slock_t;
336 typedef unsigned int slock_t;
339 #define TAS(lock) tas(lock)
341 * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
342 * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
344 static __inline__ int
345 tas(volatile slock_t *lock)
350 __asm__ __volatile__(
364 : "=&r"(_t), "=r"(_res), "+m"(*lock)
370 /* PowerPC S_UNLOCK is almost standard but requires a "sync" instruction */
371 #define S_UNLOCK(lock) \
374 __asm__ __volatile__ (" sync \n"); \
375 *((volatile slock_t *) (lock)) = 0; \
381 /* Linux Motorola 68k */
382 #if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
383 #define HAS_TEST_AND_SET
385 typedef unsigned char slock_t;
387 #define TAS(lock) tas(lock)
389 static __inline__ int
390 tas(volatile slock_t *lock)
394 __asm__ __volatile__(
398 : "=d"(rv), "+m"(*lock)
404 #endif /* (__mc68000__ || __m68k__) && __linux__ */
408 * VAXen -- even multiprocessor ones
409 * (thanks to Tom Ivar Helbekkmo)
412 #define HAS_TEST_AND_SET
414 typedef unsigned char slock_t;
416 #define TAS(lock) tas(lock)
418 static __inline__ int
419 tas(volatile slock_t *lock)
423 __asm__ __volatile__(
425 " bbssi $0, (%2), 1f \n"
428 : "=&r"(_res), "+m"(*lock)
437 #if defined(__ns32k__) /* National Semiconductor 32K */
438 #define HAS_TEST_AND_SET
440 typedef unsigned char slock_t;
442 #define TAS(lock) tas(lock)
444 static __inline__ int
445 tas(volatile slock_t *lock)
449 __asm__ __volatile__(
452 : "=r"(_res), "+m"(*lock)
458 #endif /* __ns32k__ */
461 #if defined(__alpha) || defined(__alpha__) /* Alpha */
463 * Correct multi-processor locking methods are explained in section 5.5.3
464 * of the Alpha AXP Architecture Handbook, which at this writing can be
465 * found at ftp://ftp.netbsd.org/pub/NetBSD/misc/dec-docs/index.html.
466 * For gcc we implement the handbook's code directly with inline assembler.
468 #define HAS_TEST_AND_SET
470 typedef unsigned long slock_t;
472 #define TAS(lock) tas(lock)
474 static __inline__ int
475 tas(volatile slock_t *lock)
477 register slock_t _res;
479 __asm__ __volatile__(
491 : "=&r"(_res), "+m"(*lock)
497 #define S_UNLOCK(lock) \
500 __asm__ __volatile__ (" mb \n"); \
501 *((volatile slock_t *) (lock)) = 0; \
504 #endif /* __alpha || __alpha__ */
507 #if defined(__mips__) && !defined(__sgi) /* non-SGI MIPS */
508 /* Note: on SGI we use the OS' mutex ABI, see below */
509 /* Note: R10000 processors require a separate SYNC */
510 #define HAS_TEST_AND_SET
512 typedef unsigned int slock_t;
514 #define TAS(lock) tas(lock)
516 static __inline__ int
517 tas(volatile slock_t *lock)
519 register volatile slock_t *_l = lock;
523 __asm__ __volatile__(
535 : "=&r" (_res), "=&r" (_tmp), "+R" (*_l)
541 /* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
542 #define S_UNLOCK(lock) \
545 __asm__ __volatile__( \
548 " .set noreorder \n" \
552 *((volatile slock_t *) (lock)) = 0; \
555 #endif /* __mips__ && !__sgi */
558 /* These live in s_lock.c, but only for gcc */
561 #if defined(__m68k__) && !defined(__linux__) /* non-Linux Motorola 68k */
562 #define HAS_TEST_AND_SET
564 typedef unsigned char slock_t;
568 #endif /* __GNUC__ */
573 * ---------------------------------------------------------------------
574 * Platforms that use non-gcc inline assembly:
575 * ---------------------------------------------------------------------
578 #if !defined(HAS_TEST_AND_SET) /* We didn't trigger above, let's try here */
581 #if defined(USE_UNIVEL_CC) /* Unixware compiler */
582 #define HAS_TEST_AND_SET
584 typedef unsigned char slock_t;
586 #define TAS(lock) tas(lock)
589 tas(volatile slock_t *s_lock)
591 /* UNIVEL wants %mem in column 1, so we don't pg_indent this file */
601 #endif /* defined(USE_UNIVEL_CC) */
604 #if defined(__alpha) || defined(__alpha__) /* Tru64 Unix Alpha compiler */
606 * The Tru64 compiler doesn't support gcc-style inline asm, but it does
607 * have some builtin functions that accomplish much the same results.
608 * For simplicity, slock_t is defined as long (ie, quadword) on Alpha
609 * regardless of the compiler in use. LOCK_LONG and UNLOCK_LONG only
610 * operate on an int (ie, longword), but that's OK as long as we define
611 * S_INIT_LOCK to zero out the whole quadword.
613 #define HAS_TEST_AND_SET
615 typedef unsigned long slock_t;
617 #include <alpha/builtins.h>
618 #define S_INIT_LOCK(lock) (*(lock) = 0)
619 #define TAS(lock) (__LOCK_LONG_RETRY((lock), 1) == 0)
620 #define S_UNLOCK(lock) __UNLOCK_LONG(lock)
622 #endif /* __alpha || __alpha__ */
625 #if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */
629 * See src/backend/port/hpux/tas.c.template for details about LDCWX. Because
630 * LDCWX requires a 16-byte-aligned address, we declare slock_t as a 16-byte
631 * struct. The active word in the struct is whichever has the aligned address;
632 * the other three words just sit at -1.
634 * When using gcc, we can inline the required assembly code.
636 #define HAS_TEST_AND_SET
643 #define TAS_ACTIVE_WORD(lock) ((volatile int *) (((long) (lock) + 15) & ~15))
645 #if defined(__GNUC__)
647 static __inline__ int
648 tas(volatile slock_t *lock)
650 volatile int *lockword = TAS_ACTIVE_WORD(lock);
651 register int lockval;
653 __asm__ __volatile__(
654 " ldcwx 0(0,%2),%0 \n"
655 : "=r"(lockval), "+m"(*lockword)
658 return (lockval == 0);
661 #endif /* __GNUC__ */
663 #define S_UNLOCK(lock) (*TAS_ACTIVE_WORD(lock) = -1)
665 #define S_INIT_LOCK(lock) \
667 volatile slock_t *lock_ = (lock); \
668 lock_->sema[0] = -1; \
669 lock_->sema[1] = -1; \
670 lock_->sema[2] = -1; \
671 lock_->sema[3] = -1; \
674 #define S_LOCK_FREE(lock) (*TAS_ACTIVE_WORD(lock) != 0)
676 #endif /* __hppa || __hppa__ */
679 #if defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
681 #define HAS_TEST_AND_SET
683 typedef unsigned int slock_t;
685 #include <ia64/sys/inline.h>
686 #define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
688 #endif /* HPUX on IA64, non gcc */
691 #if defined(__sgi) /* SGI compiler */
694 * slock_t is defined as a unsigned long. We use the standard SGI
697 * The following comment is left for historical reasons, but is probably
698 * not a good idea since the mutex ABI is supported.
700 * This stuff may be supplemented in the future with Masato Kataoka's MIPS-II
701 * assembly from his NECEWS SVR4 port, but we probably ought to retain this
702 * for the R3000 chips out there.
704 #define HAS_TEST_AND_SET
706 typedef unsigned long slock_t;
709 #define TAS(lock) (test_and_set(lock,1))
710 #define S_UNLOCK(lock) (test_then_and(lock,0))
711 #define S_INIT_LOCK(lock) (test_then_and(lock,0))
712 #define S_LOCK_FREE(lock) (test_then_add(lock,0) == 0)
716 #if defined(sinix) /* Sinix */
718 * SINIX / Reliant UNIX
719 * slock_t is defined as a struct abilock_t, which has a single unsigned long
720 * member. (Basically same as SGI)
722 #define HAS_TEST_AND_SET
724 #include "abi_mutex.h"
725 typedef abilock_t slock_t;
727 #define TAS(lock) (!acquire_lock(lock))
728 #define S_UNLOCK(lock) release_lock(lock)
729 #define S_INIT_LOCK(lock) init_lock(lock)
730 #define S_LOCK_FREE(lock) (stat_lock(lock) == UNLOCKED)
734 #if defined(_AIX) /* AIX */
738 #define HAS_TEST_AND_SET
740 typedef unsigned int slock_t;
742 #define TAS(lock) _check_lock(lock, 0, 1)
743 #define S_UNLOCK(lock) _clear_lock(lock, 0)
747 #if defined (nextstep) /* Nextstep */
748 #define HAS_TEST_AND_SET
750 typedef struct mutex slock_t;
752 #define S_LOCK(lock) mutex_lock(lock)
753 #define S_UNLOCK(lock) mutex_unlock(lock)
754 #define S_INIT_LOCK(lock) mutex_init(lock)
755 /* For Mach, we have to delve inside the entrails of `struct mutex'. Ick! */
756 #define S_LOCK_FREE(alock) ((alock)->lock == 0)
757 #endif /* nextstep */
760 /* These are in s_lock.c */
763 #if defined(sun3) /* Sun3 */
764 #define HAS_TEST_AND_SET
766 typedef unsigned char slock_t;
770 #if defined(__sun) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
771 #define HAS_TEST_AND_SET
772 typedef unsigned int slock_t;
774 extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
777 #define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
781 #endif /* !defined(HAS_TEST_AND_SET) */
784 /* Blow up if we didn't have any way to do spinlocks */
785 #ifndef HAS_TEST_AND_SET
786 #error PostgreSQL does not have native spinlock support on this platform. To continue the compilation, rerun configure using --disable-spinlocks. However, performance will be poor. Please report this to pgsql-bugs@postgresql.org.
790 #else /* !HAVE_SPINLOCKS */
794 * Fake spinlock implementation using semaphores --- slow and prone
795 * to fall foul of kernel limits on number of semaphores, so don't use this
796 * unless you must! The subroutines appear in spin.c.
798 typedef PGSemaphoreData slock_t;
800 extern bool s_lock_free_sema(volatile slock_t *lock);
801 extern void s_unlock_sema(volatile slock_t *lock);
802 extern void s_init_lock_sema(volatile slock_t *lock);
803 extern int tas_sema(volatile slock_t *lock);
805 #define S_LOCK_FREE(lock) s_lock_free_sema(lock)
806 #define S_UNLOCK(lock) s_unlock_sema(lock)
807 #define S_INIT_LOCK(lock) s_init_lock_sema(lock)
808 #define TAS(lock) tas_sema(lock)
811 #endif /* HAVE_SPINLOCKS */
815 * Default Definitions - override these above as needed.
819 #define S_LOCK(lock) \
822 s_lock((lock), __FILE__, __LINE__); \
826 #if !defined(S_LOCK_FREE)
827 #define S_LOCK_FREE(lock) (*(lock) == 0)
828 #endif /* S_LOCK_FREE */
830 #if !defined(S_UNLOCK)
831 #define S_UNLOCK(lock) (*((volatile slock_t *) (lock)) = 0)
832 #endif /* S_UNLOCK */
834 #if !defined(S_INIT_LOCK)
835 #define S_INIT_LOCK(lock) S_UNLOCK(lock)
836 #endif /* S_INIT_LOCK */
838 #if !defined(SPIN_DELAY)
839 #define SPIN_DELAY() ((void) 0)
840 #endif /* SPIN_DELAY */
843 extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or
846 #define TAS(lock) tas(lock)
851 * Platform-independent out-of-line support routines
853 extern void s_lock(volatile slock_t *lock, const char *file, int line);
855 /* Support for dynamic adjustment of spins_per_delay */
856 #define DEFAULT_SPINS_PER_DELAY 100
858 extern void set_spins_per_delay(int shared_spins_per_delay);
859 extern int update_spins_per_delay(int shared_spins_per_delay);
861 #endif /* S_LOCK_H */