From 7fc3702694996d7d373e3280812a4172cf451aac Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 11 Aug 2010 02:14:38 +0000 Subject: [PATCH] Remove 256-bit cast built-ins and make the AVX intrinsic call llvm __builtin_shufflevector with the appropriate arguments git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@110771 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 6 ------ lib/Headers/avxintrin.h | 15 +++++++++------ test/CodeGen/builtins-x86.c | 6 ------ 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 3e54135ab0..5f62c0dd29 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -377,12 +377,6 @@ BUILTIN(__builtin_ia32_rsqrtps_nr256, "V8fV8f", "") BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "") BUILTIN(__builtin_ia32_roundpd256, "V4dV4di", "") BUILTIN(__builtin_ia32_roundps256, "V8fV8fi", "") -BUILTIN(__builtin_ia32_si256_si, "V8iV4i", "") -BUILTIN(__builtin_ia32_ps256_ps, "V8fV4f", "") -BUILTIN(__builtin_ia32_pd256_pd, "V4dV2d", "") -BUILTIN(__builtin_ia32_si_si256, "V4iV8i", "") -BUILTIN(__builtin_ia32_ps_ps256, "V4fV8f", "") -BUILTIN(__builtin_ia32_pd_pd256, "V2dV4d", "") BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "") BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "") BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "") diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 6c1e32d240..b7bd1787e0 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -1122,37 +1122,40 @@ _mm256_castsi256_pd(__m256i in) static __inline __m128d __attribute__((__always_inline__, __nodebug__)) _mm256_castpd256_pd128(__m256d in) { - return (__m128d)__builtin_ia32_pd_pd256((__v4df)in); + return __builtin_shufflevector(in, in, 0, 1); } static __inline __m128 __attribute__((__always_inline__, __nodebug__)) _mm256_castps256_ps128(__m256 in) { - return (__m128)__builtin_ia32_ps_ps256((__v8sf)in); + return __builtin_shufflevector(in, in, 0, 1, 2, 3); } static __inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm256_castsi256_si128(__m256i in) { - return (__m128i)__builtin_ia32_si_si256((__v8si)in); + return __builtin_shufflevector(in, in, 0, 1); } static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_castpd128_pd256(__m128d in) { - return (__m256d)__builtin_ia32_pd256_pd((__v2df)in); + __m128d zero = _mm_setzero_pd(); + return __builtin_shufflevector(in, zero, 0, 1, 2, 2); } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_castps128_ps256(__m128 in) { - return (__m256)__builtin_ia32_ps256_ps((__v4sf)in); + __m128 zero = _mm_setzero_ps(); + return __builtin_shufflevector(in, zero, 0, 1, 2, 3, 4, 4, 4, 4); } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_castsi128_si256(__m128i in) { - return (__m256i)__builtin_ia32_si256_si((__v4si)in); + __m128i zero = _mm_setzero_si128(); + return __builtin_shufflevector(in, zero, 0, 1, 2, 2); } #endif /* __AVX__ */ diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c index b08fc5f9ac..c97379fd06 100644 --- a/test/CodeGen/builtins-x86.c +++ b/test/CodeGen/builtins-x86.c @@ -440,12 +440,6 @@ void f0() { tmp_V8f = __builtin_ia32_rcpps256(tmp_V8f); tmp_V4d = __builtin_ia32_roundpd256(tmp_V4d, tmp_i); tmp_V8f = __builtin_ia32_roundps256(tmp_V8f, tmp_i); - tmp_V8i = __builtin_ia32_si256_si(tmp_V4i); - tmp_V8f = __builtin_ia32_ps256_ps(tmp_V4f); - tmp_V4d = __builtin_ia32_pd256_pd(tmp_V2d); - tmp_V4i = __builtin_ia32_si_si256(tmp_V8i); - tmp_V4f = __builtin_ia32_ps_ps256(tmp_V8f); - tmp_V2d = __builtin_ia32_pd_pd256(tmp_V4d); tmp_i = __builtin_ia32_vtestzpd(tmp_V2d, tmp_V2d); tmp_i = __builtin_ia32_vtestcpd(tmp_V2d, tmp_V2d); tmp_i = __builtin_ia32_vtestnzcpd(tmp_V2d, tmp_V2d); -- 2.40.0