]> granicus.if.org Git - clang/commitdiff
Remove 256-bit cast built-ins and make the AVX intrinsic call llvm __builtin_shufflev...
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>
Wed, 11 Aug 2010 02:14:38 +0000 (02:14 +0000)
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>
Wed, 11 Aug 2010 02:14:38 +0000 (02:14 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@110771 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/Headers/avxintrin.h
test/CodeGen/builtins-x86.c

index 3e54135ab0959da27f023dfc8f107716788c394d..5f62c0dd291c0d26c4f37e12c5be0f6f8265a669 100644 (file)
@@ -377,12 +377,6 @@ BUILTIN(__builtin_ia32_rsqrtps_nr256, "V8fV8f", "")
 BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "")
 BUILTIN(__builtin_ia32_roundpd256, "V4dV4di", "")
 BUILTIN(__builtin_ia32_roundps256, "V8fV8fi", "")
-BUILTIN(__builtin_ia32_si256_si, "V8iV4i", "")
-BUILTIN(__builtin_ia32_ps256_ps, "V8fV4f", "")
-BUILTIN(__builtin_ia32_pd256_pd, "V4dV2d", "")
-BUILTIN(__builtin_ia32_si_si256, "V4iV8i", "")
-BUILTIN(__builtin_ia32_ps_ps256, "V4fV8f", "")
-BUILTIN(__builtin_ia32_pd_pd256, "V2dV4d", "")
 BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "")
 BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "")
 BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "")
index 6c1e32d2405cc3cf0ebedc6723ff546502d39175..b7bd1787e0711d9b831c1a997049eaf245177d23 100644 (file)
@@ -1122,37 +1122,40 @@ _mm256_castsi256_pd(__m256i in)
 static __inline __m128d __attribute__((__always_inline__, __nodebug__))
 _mm256_castpd256_pd128(__m256d in)
 {
-  return (__m128d)__builtin_ia32_pd_pd256((__v4df)in);
+  return __builtin_shufflevector(in, in, 0, 1);
 }
 
 static __inline __m128 __attribute__((__always_inline__, __nodebug__))
 _mm256_castps256_ps128(__m256 in)
 {
-  return (__m128)__builtin_ia32_ps_ps256((__v8sf)in);
+  return __builtin_shufflevector(in, in, 0, 1, 2, 3);
 }
 
 static __inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm256_castsi256_si128(__m256i in)
 {
-  return (__m128i)__builtin_ia32_si_si256((__v8si)in);
+  return __builtin_shufflevector(in, in, 0, 1);
 }
 
 static __inline __m256d __attribute__((__always_inline__, __nodebug__))
 _mm256_castpd128_pd256(__m128d in)
 {
-  return (__m256d)__builtin_ia32_pd256_pd((__v2df)in);
+  __m128d zero = _mm_setzero_pd();
+  return __builtin_shufflevector(in, zero, 0, 1, 2, 2);
 }
 
 static __inline __m256 __attribute__((__always_inline__, __nodebug__))
 _mm256_castps128_ps256(__m128 in)
 {
-  return (__m256)__builtin_ia32_ps256_ps((__v4sf)in);
+  __m128 zero = _mm_setzero_ps();
+  return __builtin_shufflevector(in, zero, 0, 1, 2, 3, 4, 4, 4, 4);
 }
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_castsi128_si256(__m128i in)
 {
-  return (__m256i)__builtin_ia32_si256_si((__v4si)in);
+  __m128i zero = _mm_setzero_si128();
+  return __builtin_shufflevector(in, zero, 0, 1, 2, 2);
 }
 
 #endif /* __AVX__ */
index b08fc5f9aceeb6c49e910310f82a55ba9f89ab29..c97379fd06eacbdda7d9630b44ba777051e80e8b 100644 (file)
@@ -440,12 +440,6 @@ void f0() {
   tmp_V8f = __builtin_ia32_rcpps256(tmp_V8f);
   tmp_V4d = __builtin_ia32_roundpd256(tmp_V4d, tmp_i);
   tmp_V8f = __builtin_ia32_roundps256(tmp_V8f, tmp_i);
-  tmp_V8i = __builtin_ia32_si256_si(tmp_V4i);
-  tmp_V8f = __builtin_ia32_ps256_ps(tmp_V4f);
-  tmp_V4d = __builtin_ia32_pd256_pd(tmp_V2d);
-  tmp_V4i = __builtin_ia32_si_si256(tmp_V8i);
-  tmp_V4f = __builtin_ia32_ps_ps256(tmp_V8f);
-  tmp_V2d = __builtin_ia32_pd_pd256(tmp_V4d);
   tmp_i = __builtin_ia32_vtestzpd(tmp_V2d, tmp_V2d);
   tmp_i = __builtin_ia32_vtestcpd(tmp_V2d, tmp_V2d);
   tmp_i = __builtin_ia32_vtestnzcpd(tmp_V2d, tmp_V2d);