[X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss.

author Craig Topper <craig.topper@intel.com>

Wed, 30 May 2018 18:27:07 +0000 (18:27 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 30 May 2018 18:27:07 +0000 (18:27 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 30 May 2018 18:27:07 +0000 (18:27 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 30 May 2018 18:27:07 +0000 (18:27 +0000)
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h

index 75d1c9adb3597bcced06de09b77a85b37a1ee14b..500e1026fa7d9455130f68f010a7b9d20530cef3 100644 (file)
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -224,8 +224,7 @@ _mm_div_ps(__m128 __a, __m128 __b)
  static __inline__ __m128 __DEFAULT_FN_ATTRS
  _mm_sqrt_ss(__m128 __a)
  {
-  __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a);
-  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
+  return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
  }
  
  /// Calculates the square roots of the values stored in a 128-bit vector
@@ -260,8 +259,7 @@ _mm_sqrt_ps(__m128 __a)
  static __inline__ __m128 __DEFAULT_FN_ATTRS
  _mm_rcp_ss(__m128 __a)
  {
-  __m128 __c = __builtin_ia32_rcpss((__v4sf)__a);
-  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
+  return (__m128)__builtin_ia32_rcpss((__v4sf)__a);
  }
  
  /// Calculates the approximate reciprocals of the values stored in a
@@ -278,7 +276,7 @@ _mm_rcp_ss(__m128 __a)
  static __inline__ __m128 __DEFAULT_FN_ATTRS
  _mm_rcp_ps(__m128 __a)
  {
-  return __builtin_ia32_rcpps((__v4sf)__a);
+  return (__m128)__builtin_ia32_rcpps((__v4sf)__a);
  }
  
  /// Calculates the approximate reciprocal of the square root of the value
@@ -297,8 +295,7 @@ _mm_rcp_ps(__m128 __a)
  static __inline__ __m128 __DEFAULT_FN_ATTRS
  _mm_rsqrt_ss(__m128 __a)
  {
-  __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a);
-  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
+  return __builtin_ia32_rsqrtss((__v4sf)__a);
  }
  
  /// Calculates the approximate reciprocals of the square roots of the
diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c

index 7e446ba2b02c15997c1723c77f34b6bf751b4648..18e51349fb5d9ac8e56e8405975bd3374a48a896 100644 (file)
--- a/test/CodeGen/sse-builtins.c
+++ b/test/CodeGen/sse-builtins.c
@@ -508,14 +508,6 @@ __m128 test_mm_rcp_ps(__m128 x) {
  __m128 test_mm_rcp_ss(__m128 x) {
    // CHECK-LABEL: test_mm_rcp_ss
    // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
-  // CHECK: extractelement <4 x float> {{.*}}, i32 0
-  // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
-  // CHECK: extractelement <4 x float> {{.*}}, i32 1
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
-  // CHECK: extractelement <4 x float> {{.*}}, i32 2
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
-  // CHECK: extractelement <4 x float> {{.*}}, i32 3
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    return _mm_rcp_ss(x);
  }
  
@@ -528,14 +520,6 @@ __m128 test_mm_rsqrt_ps(__m128 x) {
  __m128 test_mm_rsqrt_ss(__m128 x) {
    // CHECK-LABEL: test_mm_rsqrt_ss
    // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
-  // CHECK: extractelement <4 x float> {{.*}}, i32 0
-  // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
-  // CHECK: extractelement <4 x float> {{.*}}, i32 1
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
-  // CHECK: extractelement <4 x float> {{.*}}, i32 2
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
-  // CHECK: extractelement <4 x float> {{.*}}, i32 3
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    return _mm_rsqrt_ss(x);
  }
  
@@ -662,14 +646,6 @@ __m128 test_mm_sqrt_ps(__m128 x) {
  __m128 test_sqrt_ss(__m128 x) {
    // CHECK: define {{.*}} @test_sqrt_ss
    // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
-  // CHECK: extractelement <4 x float> {{.*}}, i32 0
-  // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
-  // CHECK: extractelement <4 x float> {{.*}}, i32 1
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
-  // CHECK: extractelement <4 x float> {{.*}}, i32 2
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
-  // CHECK: extractelement <4 x float> {{.*}}, i32 3
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    return _mm_sqrt_ss(x);
  }
author	Craig Topper <craig.topper@intel.com>
	Wed, 30 May 2018 18:27:07 +0000 (18:27 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 30 May 2018 18:27:07 +0000 (18:27 +0000)
lib/Headers/xmmintrin.h		patch \| blob \| history
test/CodeGen/sse-builtins.c		patch \| blob \| history