From 65a963d4021375aaf6791377a41b5b83b9b20708 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 28 May 2014 20:26:57 +0000 Subject: [PATCH] added Intel's BMI intrinsic variants (fixes PR19431 - http://llvm.org/bugs/show_bug.cgi?id=19431) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@209769 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/bmiintrin.h | 35 ++++++++++++++- test/CodeGen/bmi-builtins.c | 87 ++++++++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h index 8cb00f51d3..43c4a5e5de 100644 --- a/lib/Headers/bmiintrin.h +++ b/lib/Headers/bmiintrin.h @@ -32,6 +32,14 @@ #ifndef __BMIINTRIN_H #define __BMIINTRIN_H +#define _tzcnt_u16(a) (__tzcnt_u16((a))) +#define _andn_u32(a, b) (__andn_u32((a), (b))) +/* _bextr_u32 != __bextr_u32 */ +#define _blsi_u32(a) (__blsi_u32((a))) +#define _blsmsk_u32(a) (__blsmsk_u32((a))) +#define _blsr_u32(a) (__blsr_u32((a))) +#define _tzcnt_u32(a) (__tzcnt_u32((a))) + static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) __tzcnt_u16(unsigned short __X) { @@ -44,12 +52,20 @@ __andn_u32(unsigned int __X, unsigned int __Y) return ~__X & __Y; } +/* AMD-specified, double-leading-underscore version of BEXTR */ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __bextr_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } +/* Intel-specified, single-leading-underscore version of BEXTR */ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) +{ + return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +} + static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __blsi_u32(unsigned int __X) { @@ -75,18 +91,34 @@ __tzcnt_u32(unsigned int __X) } #ifdef __x86_64__ + +#define _andn_u64(a, b) (__andn_u64((a), (b))) +/* _bextr_u64 != __bextr_u64 */ +#define _blsi_u64(a) (__blsi_u64((a))) +#define _blsmsk_u64(a) (__blsmsk_u64((a))) +#define _blsr_u64(a) (__blsr_u64((a))) +#define _tzcnt_u64(a) (__tzcnt_u64((a))) + static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } +/* AMD-specified, double-leading-underscore version of BEXTR */ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __bextr_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } +/* Intel-specified, single-leading-underscore version of BEXTR */ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) +{ + return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +} + static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __blsi_u64(unsigned long long __X) { @@ -110,6 +142,7 @@ __tzcnt_u64(unsigned long long __X) { return __builtin_ctzll(__X); } -#endif + +#endif /* __x86_64__ */ #endif /* __BMIINTRIN_H */ diff --git a/test/CodeGen/bmi-builtins.c b/test/CodeGen/bmi-builtins.c index 2e1ba12d9b..92332e3a12 100644 --- a/test/CodeGen/bmi-builtins.c +++ b/test/CodeGen/bmi-builtins.c @@ -5,6 +5,14 @@ #include +// The double underscore intrinsics are for compatibility with +// AMD's BMI interface. The single underscore intrinsics +// are for compatibility with Intel's BMI interface. +// Apart from the underscores, the interfaces are identical +// except in one case: although the 'bextr' register-form +// instruction is identical in hardware, the AMD and Intel +// intrinsics are different! + unsigned short test__tzcnt_u16(unsigned short __X) { // CHECK: @llvm.cttz.i16 return __tzcnt_u16(__X); @@ -39,7 +47,7 @@ unsigned int test__blsr_u32(unsigned int __X) { return __blsr_u32(__X); } -unsigned int test_tzcnt_u32(unsigned int __X) { +unsigned int test__tzcnt_u32(unsigned int __X) { // CHECK: @llvm.cttz.i32 return __tzcnt_u32(__X); } @@ -77,3 +85,80 @@ unsigned long long test__tzcnt_u64(unsigned long long __X) { // CHECK: @llvm.cttz.i64 return __tzcnt_u64(__X); } + +// Intel intrinsics + +unsigned short test_tzcnt_u16(unsigned short __X) { + // CHECK: @llvm.cttz.i16 + return _tzcnt_u16(__X); +} + +unsigned int test_andn_u32(unsigned int __X, unsigned int __Y) { + // CHECK: [[DEST:%.*]] = xor i32 %{{.*}}, -1 + // CHECK-NEXT: %{{.*}} = and i32 %{{.*}}, [[DEST]] + return _andn_u32(__X, __Y); +} + +unsigned int test_bextr_u32(unsigned int __X, unsigned int __Y, + unsigned int __Z) { + // CHECK: @llvm.x86.bmi.bextr.32 + return _bextr_u32(__X, __Y, __Z); +} + +unsigned int test_blsi_u32(unsigned int __X) { + // CHECK: [[DEST:%.*]] = sub i32 0, [[SRC:%.*]] + // CHECK-NEXT: %{{.*}} = and i32 [[SRC]], [[DEST]] + return _blsi_u32(__X); +} + +unsigned int test_blsmsk_u32(unsigned int __X) { + // CHECK: [[DEST:%.*]] = add i32 [[SRC:%.*]], -1 + // CHECK-NEXT: %{{.*}} = xor i32 [[DEST]], [[SRC]] + return _blsmsk_u32(__X); +} + +unsigned int test_blsr_u32(unsigned int __X) { + // CHECK: [[DEST:%.*]] = add i32 [[SRC:%.*]], -1 + // CHECK-NEXT: %{{.*}} = and i32 [[DEST]], [[SRC]] + return _blsr_u32(__X); +} + +unsigned int test_tzcnt_u32(unsigned int __X) { + // CHECK: @llvm.cttz.i32 + return _tzcnt_u32(__X); +} + +unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) { + // CHECK: [[DEST:%.*]] = xor i64 %{{.*}}, -1 + // CHECK-NEXT: %{{.*}} = and i64 %{{.*}}, [[DEST]] + return _andn_u64(__X, __Y); +} + +unsigned long long test_bextr_u64(unsigned long __X, unsigned int __Y, + unsigned int __Z) { + // CHECK: @llvm.x86.bmi.bextr.64 + return _bextr_u64(__X, __Y, __Z); +} + +unsigned long long test_blsi_u64(unsigned long long __X) { + // CHECK: [[DEST:%.*]] = sub i64 0, [[SRC:%.*]] + // CHECK-NEXT: %{{.*}} = and i64 [[SRC]], [[DEST]] + return _blsi_u64(__X); +} + +unsigned long long test_blsmsk_u64(unsigned long long __X) { + // CHECK: [[DEST:%.*]] = add i64 [[SRC:%.*]], -1 + // CHECK-NEXT: %{{.*}} = xor i64 [[DEST]], [[SRC]] + return _blsmsk_u64(__X); +} + +unsigned long long test_blsr_u64(unsigned long long __X) { + // CHECK: [[DEST:%.*]] = add i64 [[SRC:%.*]], -1 + // CHECK-NEXT: %{{.*}} = and i64 [[DEST]], [[SRC]] + return _blsr_u64(__X); +} + +unsigned long long test_tzcnt_u64(unsigned long long __X) { + // CHECK: @llvm.cttz.i64 + return _tzcnt_u64(__X); +} -- 2.40.0