From d5276cce5527a496bca8e13cbc07f8d1e6044483 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Oct 2013 17:08:42 +0000 Subject: [PATCH] Use logical/arithmetic operations instead of builtins in tbmintrin.h. This way we can remove the intrinsic support from the backend. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192036 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 18 ----- lib/Headers/tbmintrin.h | 114 ++++++++++++---------------- test/CodeGen/tbm-builtins.c | 64 +++++++++++----- 3 files changed, 93 insertions(+), 103 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index dea1cc6b16..51397fa45d 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -645,24 +645,6 @@ BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "") // TBM BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "") BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "") -BUILTIN(__builtin_ia32_blcfill_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blcfill_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_blci_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blci_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_blcic_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blcic_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_blcmsk_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blcmsk_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_blcs_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blcs_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_blsfill_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blsfill_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_blsic_u32, "UiUi", "") -BUILTIN(__builtin_ia32_blsic_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_t1mskc_u32, "UiUi", "") -BUILTIN(__builtin_ia32_t1mskc_u64, "ULLiULLi", "") -BUILTIN(__builtin_ia32_tzmsk_u32, "UiUi", "") -BUILTIN(__builtin_ia32_tzmsk_u64, "ULLiULLi", "") // SHA BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "") diff --git a/lib/Headers/tbmintrin.h b/lib/Headers/tbmintrin.h index 56e13be9cf..f95e34fbc1 100644 --- a/lib/Headers/tbmintrin.h +++ b/lib/Headers/tbmintrin.h @@ -34,142 +34,124 @@ #define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b))) -#ifdef __x86_64__ -#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) -#endif - static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __blcfill_u32(unsigned int a) { - return __builtin_ia32_blcfill_u32(a); + return a & (a + 1); } -#ifdef __x86_64__ -static __inline__ unsigned long long __attribute__((__always_inline__, - __nodebug__)) -__blcfill_u64(unsigned long long a) +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blci_u32(unsigned int a) { - return __builtin_ia32_blcfill_u64(a); + return a | ~(a + 1); } -#endif static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__blci_u32(unsigned int a) +__blcic_u32(unsigned int a) { - return __builtin_ia32_blci_u32(a); + return ~a & (a + 1); } -#ifdef __x86_64__ -static __inline__ unsigned long long __attribute__((__always_inline__, - __nodebug__)) -__blci_u64(unsigned long long a) +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcmsk_u32(unsigned int a) { - return __builtin_ia32_blci_u64(a); + return a ^ (a + 1); } -#endif static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__blcic_u32(unsigned int a) +__blcs_u32(unsigned int a) { - return __builtin_ia32_blcic_u32(a); + return a | (a + 1); } -#ifdef __x86_64__ -static __inline__ unsigned long long __attribute__((__always_inline__, - __nodebug__)) -__blcic_u64(unsigned long long a) +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsfill_u32(unsigned int a) { - return __builtin_ia32_blcic_u64(a); + return a | (a - 1); } -#endif static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__blcmsk_u32(unsigned int a) +__blsic_u32(unsigned int a) { - return __builtin_ia32_blcmsk_u32(a); + return ~a | (a - 1); } -#ifdef __x86_64__ -static __inline__ unsigned long long __attribute__((__always_inline__, - __nodebug__)) -__blcmsk_u64(unsigned long long a) +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__t1mskc_u32(unsigned int a) { - return __builtin_ia32_blcmsk_u64(a); + return ~a | (a + 1); } -#endif static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__blcs_u32(unsigned int a) +__tzmsk_u32(unsigned int a) { - return __builtin_ia32_blcs_u32(a); + return ~a & (a - 1); } #ifdef __x86_64__ +#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) + static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) -__blcs_u64(unsigned long long a) +__blcfill_u64(unsigned long long a) { - return __builtin_ia32_blcs_u64(a); + return a & (a + 1); } -#endif -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__blsfill_u32(unsigned int a) +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blci_u64(unsigned long long a) { - return __builtin_ia32_blsfill_u32(a); + return a | ~(a + 1); } -#ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) -__blsfill_u64(unsigned long long a) +__blcic_u64(unsigned long long a) { - return __builtin_ia32_blsfill_u64(a); + return ~a & (a + 1); } -#endif -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__blsic_u32(unsigned int a) +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcmsk_u64(unsigned long long a) { - return __builtin_ia32_blsic_u32(a); + return a ^ (a + 1); } -#ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) -__blsic_u64(unsigned long long a) +__blcs_u64(unsigned long long a) { - return __builtin_ia32_blsic_u64(a); + return a | (a + 1); } -#endif -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__t1mskc_u32(unsigned int a) +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blsfill_u64(unsigned long long a) { - return __builtin_ia32_t1mskc_u32(a); + return a | (a - 1); } -#ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) -__t1mskc_u64(unsigned long long a) +__blsic_u64(unsigned long long a) { - return __builtin_ia32_t1mskc_u64(a); + return ~a | (a - 1); } -#endif -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__tzmsk_u32(unsigned int a) +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__t1mskc_u64(unsigned long long a) { - return __builtin_ia32_tzmsk_u32(a); + return ~a | (a + 1); } -#ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __tzmsk_u64(unsigned long long a) { - return __builtin_ia32_tzmsk_u64(a); + return ~a & (a - 1); } #endif diff --git a/test/CodeGen/tbm-builtins.c b/test/CodeGen/tbm-builtins.c index 109c7f897a..e3a702161e 100644 --- a/test/CodeGen/tbm-builtins.c +++ b/test/CodeGen/tbm-builtins.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H @@ -21,91 +21,117 @@ unsigned long long test__bextri_u64_bigint(unsigned long long a) { } unsigned int test__blcfill_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blcfill.u32 + // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1 + // CHECK-NEXT: %{{.*}} = and i32 [[TMP]], [[SRC]] return __blcfill_u32(a); } unsigned long long test__blcfill_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blcfill.u64 + // CHECK: [[TMPT:%.*]] = add i64 [[SRC:%.*]], 1 + // CHECK-NEXT: %{{.*}} = and i64 [[TMP]], [[SRC]] return __blcfill_u64(a); } unsigned int test__blci_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blci.u32 + // CHECK: [[TMP:%.*]] = sub i32 -2, [[SRC:%.*]] + // CHECK-NEXT: %{{.*}} = or i32 [[TMP]], [[SRC]] return __blci_u32(a); } unsigned long long test__blci_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blci.u64 + // CHECK: [[TMP:%.*]] = sub i64 -2, [[SRC:%.*]] + // CHECK-NEXT: %{{.*}} = or i64 [[TMP]], [[SRC]] return __blci_u64(a); } unsigned int test__blcic_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blcic.u32 + // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC]], 1 + // CHECK-NEXT: {{.*}} = and i32 [[TMP2]], [[TMP1]] return __blcic_u32(a); } unsigned long long test__blcic_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blcic.u64 + // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC]], 1 + // CHECK-NEXT: {{.*}} = and i64 [[TMP2]], [[TMP1]] return __blcic_u64(a); } unsigned int test__blcmsk_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blcmsk.u32 + // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1 + // CHECK-NEXT: {{.*}} = xor i32 [[TMP]], [[SRC]] return __blcmsk_u32(a); } unsigned long long test__blcmsk_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blcmsk.u64 + // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1 + // CHECK-NEXT: {{.*}} = xor i64 [[TMP]], [[SRC]] return __blcmsk_u64(a); } unsigned int test__blcs_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blcs.u32 + // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1 + // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]] return __blcs_u32(a); } unsigned long long test__blcs_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blcs.u64 + // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1 + // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]] return __blcs_u64(a); } unsigned int test__blsfill_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blsfill.u32 + // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], -1 + // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]] return __blsfill_u32(a); } unsigned long long test__blsfill_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blsfill.u64 + // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], -1 + // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]] return __blsfill_u64(a); } unsigned int test__blsic_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.blsic.u32 + // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], -1 + // CHECK-NEXT: {{.*}} = or i32 [[TMP2]], [[TMP1]] return __blsic_u32(a); } unsigned long long test__blsic_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.blsic.u64 + // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], -1 + // CHECK-NEXT: {{.*}} = or i64 [[TMP2]], [[TMP1]] return __blsic_u64(a); } unsigned int test__t1mskc_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.t1mskc.u32 + // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], 1 + // CHECK-NEXT: {{.*}} = or i32 [[TMP2]], [[TMP1]] return __t1mskc_u32(a); } unsigned long long test__t1mskc_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.t1mskc.u64 + // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], 1 + // CHECK-NEXT: {{.*}} = or i64 [[TMP2]], [[TMP1]] return __t1mskc_u64(a); } unsigned int test__tzmsk_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.tzmsk.u32 + // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], -1 + // CHECK-NEXT: {{.*}} = and i32 [[TMP2]], [[TMP1]] return __tzmsk_u32(a); } unsigned long long test__tzmsk_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.tzmsk.u64 + // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1 + // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], -1 + // CHECK-NEXT: {{.*}} = and i64 [[TMP2]], [[TMP1]] return __tzmsk_u64(a); } -- 2.40.0