unsigned __int64 __rdtsc(void);
#pragma intrinsic(__rdtsc)
#endif
+// Note:
+// 32-bit CPU cycle counter is light-weighted for most function performance
+// measurement. For large function (CPU time > a couple of seconds), 64-bit
+// counter should be used.
+// 32-bit CPU cycle counter
static INLINE unsigned int
x86_readtsc(void) {
#if defined(__GNUC__) && __GNUC__
#endif
#endif
}
-
+// 64-bit CPU cycle counter
+static INLINE uint64_t
+x86_readtsc64(void) {
+#if defined(__GNUC__) && __GNUC__
+ uint32_t hi, lo;
+ __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)hi << 32) | lo;
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+ uint_t hi, lo;
+ asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)hi << 32) | lo;
+#else
+#if ARCH_X86_64
+ return (uint64_t)__rdtsc();
+#else
+ __asm rdtsc;
+#endif
+#endif
+}
#if defined(__GNUC__) && __GNUC__
#define x86_pause_hint()\