Remove 256-bit shuffle built-ins and make the AVX intrinsic call llvm __builtin_shuff...

author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Wed, 11 Aug 2010 01:17:34 +0000 (01:17 +0000)

committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Wed, 11 Aug 2010 01:17:34 +0000 (01:17 +0000)
author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Wed, 11 Aug 2010 01:17:34 +0000 (01:17 +0000)
committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Wed, 11 Aug 2010 01:17:34 +0000 (01:17 +0000)
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def

index 26f4ce4884aa4c8132261dae93816a6250de061f..71f25cabc311daf57c951250f3fe1f517d9d6b3d 100644 (file)
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -346,8 +346,6 @@ BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fi", "")
  BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "")
  BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "")
  BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fi", "")
-BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dc", "")
-BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fc", "")
  BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dc", "")
  BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fc", "")
  BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dc", "")
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h

index 719ca8f9125ae15a2907c153ad9d8938af8385d4..7beb5ba1c1849c5868adedb3de13e2a77bed3969 100644 (file)
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -340,17 +340,19 @@ _mm256_dp_ps(__m256 a, __m256 b, const int c)
  }
  
  /* Vector shuffle */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
-_mm256_shuffle_pd(__m256d a, __m256d b, const int s)
-{
-  return (__m256d)__builtin_ia32_shufpd256((__v4df)a, (__v4df)b, s);
-}
-
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
-_mm256_shuffle_ps(__m256 a, __m256 b, const int s)
-{
-  return (__m256)__builtin_ia32_shufps256((__v8sf)a, (__v8sf)b, s);
-}
+#define _mm256_shuffle_ps(a, b, mask) \
+        (__builtin_shufflevector((__v8sf)(a), (__v8sf)(b), \
+        (mask) & 0x3,                ((mask) & 0xc) >> 2, \
+        (((mask) & 0x30) >> 4) + 8,  (((mask) & 0xc0) >> 6) + 8 \
+        (mask) & 0x3 + 4,            (((mask) & 0xc) >> 2) + 4, \
+        (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12))
+
+#define _mm256_shuffle_pd(a, b, mask) \
+        (__builtin_shufflevector((__v4df)(a), (__v4df)(b), \
+        (mask) & 0x1, \
+        (((mask) & 0x2) >> 1) + 4, \
+        (((mask) & 0x4) >> 2) + 2, \
+        (((mask) & 0x8) >> 3) + 6))
  
  /* Compare */
  #define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c

index 3be7a39ecdd3439de41baf1a9d269597c0a56dfb..0e0a253755b70b4eac3b15415aa0e39c6cd87259 100644 (file)
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -409,8 +409,6 @@ void f0() {
    tmp_V4d = __builtin_ia32_blendvpd256(tmp_V4d, tmp_V4d, tmp_V4d);
    tmp_V8f = __builtin_ia32_blendvps256(tmp_V8f, tmp_V8f, tmp_V8f);
    tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
-  tmp_V4d = __builtin_ia32_shufpd256(tmp_V4d, tmp_V4d, 0x7);
-  tmp_V8f = __builtin_ia32_shufps256(tmp_V8f, tmp_V8f, 0x7);
    tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
    tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
    tmp_V2d = __builtin_ia32_vextractf128_pd256(tmp_V4d, 0x7);
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Wed, 11 Aug 2010 01:17:34 +0000 (01:17 +0000)
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Wed, 11 Aug 2010 01:17:34 +0000 (01:17 +0000)
include/clang/Basic/BuiltinsX86.def		patch \| blob \| history
lib/Headers/avxintrin.h		patch \| blob \| history
test/CodeGen/builtins-x86.c		patch \| blob \| history