[X86] Use __builtin_ia32_vec_ext_v4hi and __builtin_ia32_vec_set_v4hi to implement...

author Craig Topper <craig.topper@gmail.com>

Sat, 9 Jul 2016 05:30:41 +0000 (05:30 +0000)

committer Craig Topper <craig.topper@gmail.com>

Sat, 9 Jul 2016 05:30:41 +0000 (05:30 +0000)
author Craig Topper <craig.topper@gmail.com>
Sat, 9 Jul 2016 05:30:41 +0000 (05:30 +0000)
committer Craig Topper <craig.topper@gmail.com>
Sat, 9 Jul 2016 05:30:41 +0000 (05:30 +0000)
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def

index fc71f90e0740f4c8e76954513f50d49c01f72e56..59dee2f046654999a09352779fdf7edc5181870c 100644 (file)
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -161,6 +161,8 @@ TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "", "sse")
  TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "", "sse")
  TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "", "sse")
  TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "", "sse")
  
  // MMX+SSE2
  TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "", "sse2")
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h

index 27967e0d856cc2053250560d64a01a9105f96f3b..3110e8babf9463af40870cf8286efa18a350e314 100644 (file)
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -2114,12 +2114,8 @@ _mm_sfence(void)
  ///    2: Bits [47:32] are copied to the destination.
  ///    3: Bits [63:48] are copied to the destination.
  /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_extract_pi16(__m64 __a, int __n)
-{
-  __v4hi __b = (__v4hi)__a;
-  return (unsigned short)__b[__n & 3];
-}
+#define _mm_extract_pi16(a, n) __extension__ ({ \
+  (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); })
  
  /// \brief Copies data from the 64-bit vector of [4 x i16] to the destination,
  ///    and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2145,13 +2141,8 @@ _mm_extract_pi16(__m64 __a, int __n)
  ///    bits in operand __a.
  /// \returns A 64-bit integer vector containing the copied packed data from the
  ///    operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_insert_pi16(__m64 __a, int __d, int __n)
-{
-   __v4hi __b = (__v4hi)__a;
-   __b[__n & 3] = __d;
-   return (__m64)__b;
-}
+#define _mm_insert_pi16(a, d, n) __extension__ ({ \
+  (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); })
  
  /// \brief Compares each of the corresponding packed 16-bit integer values of
  ///    the 64-bit integer vectors, and writes the greater value to the
diff --git a/test/CodeGen/mmx-builtins.c b/test/CodeGen/mmx-builtins.c

index e2761f4e36cf6dcd358ccb4d19e848eab9cddd61..2bf497d58aaf02d64af97858eb1d26f4917925de 100644 (file)
--- a/test/CodeGen/mmx-builtins.c
+++ b/test/CodeGen/mmx-builtins.c
@@ -217,6 +217,12 @@ __m64 test_mm_cvttps_pi32(__m128 a) {
    return _mm_cvttps_pi32(a);
  }
  
+int test_mm_extract_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_extract_pi16
+  // CHECK: call i32 @llvm.x86.mmx.pextr.w
+  return _mm_extract_pi16(a, 2);
+}
+
  __m64 test_m_from_int(int a) {
    // CHECK-LABEL: test_m_from_int
    // CHECK: insertelement <2 x i32>
@@ -265,6 +271,12 @@ __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
    return _mm_hsubs_pi16(a, b);
  }
  
+__m64 test_mm_insert_pi16(__m64 a, int d) {
+  // CHECK-LABEL: test_mm_insert_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pinsr.w
+  return _mm_insert_pi16(a, d, 2);
+}
+
  __m64 test_mm_madd_pi16(__m64 a, __m64 b) {
    // CHECK-LABEL: test_mm_madd_pi16
    // CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd
author	Craig Topper <craig.topper@gmail.com>
	Sat, 9 Jul 2016 05:30:41 +0000 (05:30 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Sat, 9 Jul 2016 05:30:41 +0000 (05:30 +0000)
include/clang/Basic/BuiltinsX86.def		patch \| blob \| history
lib/Headers/xmmintrin.h		patch \| blob \| history
test/CodeGen/mmx-builtins.c		patch \| blob \| history