From: John McCall Date: Wed, 6 Apr 2011 03:37:51 +0000 (+0000) Subject: Implement the AVX cmp builtins as macros instead of static inlines. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d688bc0e927193c1df1586b4000683fce7f38287;p=clang Implement the AVX cmp builtins as macros instead of static inlines. Patch by Syoyo Fujita! Reviewed by Chris Lattner! Checked in by me! git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@128984 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 884d31cb89..2eb2f85622 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -385,41 +385,23 @@ _mm256_dp_ps(__m256 a, __m256 b, const int c) #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ -static __inline __m128d __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_pd(__m128d a, __m128d b, const int c) -{ - return (__m128d)__builtin_ia32_cmppd((__v2df)a, (__v2df)b, c); -} +#define _mm_cmp_pd(a, b, c) \ + (__m128d)__builtin_ia32_cmppd((__v2df)(a), (__v2df)(b), (c)) -static __inline __m128 __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_ps(__m128 a, __m128 b, const int c) -{ - return (__m128)__builtin_ia32_cmpps((__v4sf)a, (__v4sf)b, c); -} +#define _mm_cmp_ps(a, b, c) \ + (__m128)__builtin_ia32_cmpps((__v4sf)(a), (__v4sf)(b), (c)) -static __inline __m256d __attribute__((__always_inline__, __nodebug__)) -_mm256_cmp_pd(__m256d a, __m256d b, const int c) -{ - return (__m256d)__builtin_ia32_cmppd256((__v4df)a, (__v4df)b, c); -} +#define _mm256_cmp_pd(a, b, c) \ + (__m256d)__builtin_ia32_cmppd256((__v4df)(a), (__v4df)(b), (c)) -static __inline __m256 __attribute__((__always_inline__, __nodebug__)) -_mm256_cmp_ps(__m256 a, __m256 b, const int c) -{ - return (__m256)__builtin_ia32_cmpps256((__v8sf)a, (__v8sf)b, c); -} +#define _mm256_cmp_ps(a, b, c) \ + (__m256)__builtin_ia32_cmpps256((__v8sf)(a), (__v8sf)(b), (c)) -static __inline __m128d __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_sd(__m128d a, __m128d b, const int c) -{ - return (__m128d)__builtin_ia32_cmpsd((__v2df)a, (__v2df)b, c); -} +#define _mm_cmp_sd(a, b, c) \ + (__m128d)__builtin_ia32_cmpsd((__v2df)(a), (__v2df)(b), (c)) -static __inline __m128 __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_ss(__m128 a, __m128 b, const int c) -{ - return (__m128)__builtin_ia32_cmpss((__v4sf)a, (__v4sf)b, c); -} +#define _mm_cmp_ss(a, b, c) \ + (__m128)__builtin_ia32_cmpss((__v4sf)(a), (__v4sf)(b), (c)) /* Vector extract */ static __inline __m128d __attribute__((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx-cmp-builtins.c b/test/CodeGen/avx-cmp-builtins.c new file mode 100644 index 0000000000..42b73d38c0 --- /dev/null +++ b/test/CodeGen/avx-cmp-builtins.c @@ -0,0 +1,47 @@ +// RUN: %clang -mavx -c -emit-llvm %s -o - | llvm-dis | FileCheck %s +#include + +// +// Test if third argument of cmp_XY function in LLVM IR form has immediate value. +// +void test_cmp_ps256() { + __m256 a, b, c; + a = _mm256_cmp_ps(b, c, _CMP_GE_OS); + // CHECK: @test_cmp_ps256 + // CHECK: %0 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %tmp, <8 x float> %tmp1, i8 13) +} + +void test_cmp_pd256() { + __m256d a, b, c; + a = _mm256_cmp_pd(b, c, _CMP_GE_OS); + // CHECK: @test_cmp_pd256 + // CHECK: %0 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %tmp, <4 x double> %tmp1, i8 13) +} + +void test_cmp_ps() { + __m128 a, b, c; + a = _mm_cmp_ps(b, c, _CMP_GE_OS); + // CHECK: @test_cmp_ps + // CHECK: %cmpps = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %tmp, <4 x float> %tmp1, i8 13) +} + +void test_cmp_pd() { + __m128d a, b, c; + a = _mm_cmp_pd(b, c, _CMP_GE_OS); + // CHECK: @test_cmp_pd + // CHECK: %cmppd = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %tmp, <2 x double> %tmp1, i8 13) +} + +void test_cmp_sd() { + __m128d a, b, c; + a = _mm_cmp_sd(b, c, _CMP_GE_OS); + // CHECK: @test_cmp_sd + // CHECK: %cmpsd = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %tmp, <2 x double> %tmp1, i8 13) +} + +void test_cmp_ss() { + __m128 a, b, c; + a = _mm_cmp_ss(b, c, _CMP_GE_OS); + // CHECK: @test_cmp_ss + // CHECK: %cmpss = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %tmp, <4 x float> %tmp1, i8 13) +}