From: Craig Topper <craig.topper@gmail.com>
Date: Sat, 9 Jul 2016 05:30:41 +0000 (+0000)
Subject: [X86] Use __builtin_ia32_vec_ext_v4hi and __builtin_ia32_vec_set_v4hi to implement... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=561ba720accd662d8df55b89b348a23614a4fd54;p=clang

[X86] Use __builtin_ia32_vec_ext_v4hi and __builtin_ia32_vec_set_v4hi to implement pextrw/pinsertw MMX intrinsics instead of trying to use native IR.

Without this we end up generating code that doesn't use mmx registers and probably doesn't work well with other mmx intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274968 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index fc71f90e07..59dee2f046 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -161,6 +161,8 @@ TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "", "sse")
 
 // MMX+SSE2
 TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "", "sse2")
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 27967e0d85..3110e8babf 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -2114,12 +2114,8 @@ _mm_sfence(void)
 ///    2: Bits [47:32] are copied to the destination.
 ///    3: Bits [63:48] are copied to the destination.
 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_extract_pi16(__m64 __a, int __n)
-{
-  __v4hi __b = (__v4hi)__a;
-  return (unsigned short)__b[__n & 3];
-}
+#define _mm_extract_pi16(a, n) __extension__ ({ \
+  (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); })
 
 /// \brief Copies data from the 64-bit vector of [4 x i16] to the destination,
 ///    and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2145,13 +2141,8 @@ _mm_extract_pi16(__m64 __a, int __n)
 ///    bits in operand __a.
 /// \returns A 64-bit integer vector containing the copied packed data from the
 ///    operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_insert_pi16(__m64 __a, int __d, int __n)
-{
-   __v4hi __b = (__v4hi)__a;
-   __b[__n & 3] = __d;
-   return (__m64)__b;
-}
+#define _mm_insert_pi16(a, d, n) __extension__ ({ \
+  (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); })
 
 /// \brief Compares each of the corresponding packed 16-bit integer values of
 ///    the 64-bit integer vectors, and writes the greater value to the
diff --git a/test/CodeGen/mmx-builtins.c b/test/CodeGen/mmx-builtins.c
index e2761f4e36..2bf497d58a 100644
--- a/test/CodeGen/mmx-builtins.c
+++ b/test/CodeGen/mmx-builtins.c
@@ -217,6 +217,12 @@ __m64 test_mm_cvttps_pi32(__m128 a) {
   return _mm_cvttps_pi32(a);
 }
 
+int test_mm_extract_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_extract_pi16
+  // CHECK: call i32 @llvm.x86.mmx.pextr.w
+  return _mm_extract_pi16(a, 2);
+}
+
 __m64 test_m_from_int(int a) {
   // CHECK-LABEL: test_m_from_int
   // CHECK: insertelement <2 x i32>
@@ -265,6 +271,12 @@ __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
   return _mm_hsubs_pi16(a, b);
 }
 
+__m64 test_mm_insert_pi16(__m64 a, int d) {
+  // CHECK-LABEL: test_mm_insert_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pinsr.w
+  return _mm_insert_pi16(a, d, 2);
+}
+
 __m64 test_mm_madd_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_madd_pi16
   // CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd