From d9d57e99793df47a8fc86cde9ecd656411f02777 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 5 Aug 2013 06:17:21 +0000 Subject: [PATCH] Use a shuffle with undef elements instead of inserting 0s in the 128-bit to 256-bit casting intrinsics to improve performance. Thanks to Katya Romanova for identifying this issue. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@187716 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/avxintrin.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index f522ac72f2..50454f2653 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -1137,22 +1137,19 @@ _mm256_castsi256_si128(__m256i __a) static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_castpd128_pd256(__m128d __a) { - __m128d __zero = _mm_setzero_pd(); - return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2); + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_castps128_ps256(__m128 __a) { - __m128 __zero = _mm_setzero_ps(); - return __builtin_shufflevector(__a, __zero, 0, 1, 2, 3, 4, 4, 4, 4); + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_castsi128_si256(__m128i __a) { - __m128i __zero = _mm_setzero_si128(); - return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2); + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); } /* SIMD load ops (unaligned) */ -- 2.50.1