From 4a5496bdd50f6cec5f8eb252665503e5431708d9 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 10 Aug 2010 02:23:54 +0000 Subject: [PATCH] Make replicate intrinsics use shufflevector instead of dup builtins, also remove the dup builtins git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@110646 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 3 --- lib/Headers/avxintrin.h | 6 +++--- test/CodeGen/builtins-x86.c | 3 --- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index db8968f484..26f4ce4884 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -371,9 +371,6 @@ BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fc", "") BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dc", "") BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fc", "") BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4ic", "") -BUILTIN(__builtin_ia32_movshdup256, "V8fV8f", "") -BUILTIN(__builtin_ia32_movsldup256, "V8fV8f", "") -BUILTIN(__builtin_ia32_movddup256, "V4dV4d", "") BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "") BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "") BUILTIN(__builtin_ia32_sqrtps_nr256, "V8fV8f", "") diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 95deed4b91..719ca8f912 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -577,19 +577,19 @@ _mm256_cvttps_epi32(__m256 a) static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_movehdup_ps(__m256 a) { - return (__m256)__builtin_ia32_movshdup256((__v8sf)a); + return __builtin_shufflevector(a, a, 1, 1, 3, 3, 5, 5, 7, 7); } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_moveldup_ps(__m256 a) { - return (__m256)__builtin_ia32_movsldup256((__v8sf)a); + return __builtin_shufflevector(a, a, 0, 0, 2, 2, 4, 4, 6, 6); } static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_movedup_pd(__m256d a) { - return (__m256d)__builtin_ia32_movddup256((__v4df)a); + return __builtin_shufflevector(a, a, 0, 0, 2, 2); } /* Unpack and Interleave */ diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c index 287de997fa..3be7a39ecd 100644 --- a/test/CodeGen/builtins-x86.c +++ b/test/CodeGen/builtins-x86.c @@ -434,9 +434,6 @@ void f0() { tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x7); tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x7); tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x7); - tmp_V8f = __builtin_ia32_movshdup256(tmp_V8f); - tmp_V8f = __builtin_ia32_movsldup256(tmp_V8f); - tmp_V4d = __builtin_ia32_movddup256(tmp_V4d); tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d); tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f); tmp_V8f = __builtin_ia32_sqrtps_nr256(tmp_V8f); -- 2.40.0