Adding LLVM front-end support to two intrinsics dealing with bit scan: _bit_scan_forward and _bit_scan_reverse.
Their functionality is as described in Intel intrinsics guide:
https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bit_scan_forward&expand=371,370
https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bit_scan_reverse&expand=371,370
Furthermore, adding clang front-end support to these conversion intrinsics: _mm256_cvtsd_f64, _mm256_cvtsi256_si32 and _mm256_cvtss_f32.
Finally, adding tests to all of the above, as well as to the state reading intrinsics _rdpmc and _rdtsc.
Their functionality is also specified in the Intel intrinsics guide.
Commit on behalf of Omer Paparo Bivas
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271387
91177308-0d34-0410-b5e6-
96231b3b80d8
BUILTIN(__builtin_ms_va_end, "vc*&", "n")
BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
+// Bit scan
+TARGET_BUILTIN(__builtin_ia32_bit_scan_forward, "ii", "", "")
+TARGET_BUILTIN(__builtin_ia32_bit_scan_reverse, "ii", "", "")
+
// Undefined Values
//
TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "")
return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
}
+static __inline double __DEFAULT_FN_ATTRS
+_mm256_cvtsd_f64(__m256d __a)
+{
+ return __a[0];
+}
+
+static __inline int __DEFAULT_FN_ATTRS
+_mm256_cvtsi256_si32(__m256i __a)
+{
+ __v8si __b = (__v8si)__a;
+ return __b[0];
+}
+
+static __inline float __DEFAULT_FN_ATTRS
+_mm256_cvtss_f32(__m256 __a)
+{
+ return __a[0];
+}
+
/* Vector replicate */
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_movehdup_ps(__m256 __a)
#define _rdtsc() __rdtsc()
+#define _rdpmc(A) __rdpmc(A)
+
#endif /* __IA32INTRIN_H */
return __builtin_ia32_rdrand32_step(__p);
}
+/* __bit_scan_forward */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_bit_scan_forward(int __A) {
+ return __builtin_ia32_bit_scan_forward(__A);
+}
+
+/* __bit_scan_reverse */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_bit_scan_reverse(int __A) {
+ return __builtin_ia32_bit_scan_reverse(__A);
+}
+
#ifdef __x86_64__
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long *__p)
{
return __builtin_ia32_wrgsbase64(__V);
}
+
#endif
#endif /* __FSGSBASE__ */
// CHECK: call void @llvm.x86.avx.vzeroupper()
return _mm256_zeroupper();
}
+
+double test_mm256_cvtsd_f64(__m256d __a)
+{
+ // CHECK-LABEL: @test_mm256_cvtsd_f64
+ // CHECK: extractelement <4 x double> %{{.*}}, i32 0
+ return _mm256_cvtsd_f64(__a);
+}
+
+int test_mm256_cvtsi256_si32(__m256i __a)
+{
+ // CHECK-LABEL: @test_mm256_cvtsi256_si32
+ // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
+ return _mm256_cvtsi256_si32(__a);
+}
+
+float test_mm256_cvtss_f32(__m256 __a)
+{
+ // CHECK-LABEL: @test_mm256_cvtss_f32
+ // CHECK: extractelement <8 x float> %{{.*}}, i32 0
+ return _mm256_cvtss_f32(__a);
+}
--- /dev/null
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+#include <immintrin.h>
+
+int test_bit_scan_forward(int a) {
+ return _bit_scan_forward(a);
+// CHECK: @test_bit_scan_forward
+// CHECK: call i32 @llvm.x86.bit.scan.forward
+}
+
+int test_bit_scan_reverse(int a) {
+ return _bit_scan_reverse(a);
+// CHECK: @test_bit_scan_reverse
+// CHECK: call i32 @llvm.x86.bit.scan.reverse
+}
--- /dev/null
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+unsigned long long test_rdpmc(int a) {
+ return _rdpmc(a);
+// CHECK: @test_rdpmc
+// CHECK: call i64 @llvm.x86.rdpmc
+}
+
+int test_rdtsc() {
+ return _rdtsc();
+// CHECK: @test_rdtsc
+// CHECK: call i64 @llvm.x86.rdtsc
+}