TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd, "V2dV4i", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd, "V2dV4f", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_cvtepi32_pd(__m128i __a)
{
- return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
+ return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
}
/// \brief Converts a vector of [8 x i32] into a vector of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_cvtps_pd(__m128 __a)
{
- return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
+ return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
}
static __inline __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtps_pd(__m128 __a)
{
- return __builtin_ia32_cvtps2pd((__v4sf)__a);
+ return (__m128d) __builtin_convertvector(
+ __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtepi32_pd(__m128i __a)
{
- return __builtin_ia32_cvtdq2pd((__v4si)__a);
+ return (__m128d) __builtin_convertvector(
+ __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
__m256d test_mm256_cvtepi32_pd(__m128i A) {
// CHECK-LABEL: test_mm256_cvtepi32_pd
- // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}})
+ // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
return _mm256_cvtepi32_pd(A);
}
__m256d test_mm256_cvtps_pd(__m128 A) {
// CHECK-LABEL: test_mm256_cvtps_pd
- // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}})
+ // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
return _mm256_cvtps_pd(A);
}
tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
- tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i);
tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i);
tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d);
tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d);
tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
#endif
tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
- tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f);
tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
(void) __builtin_ia32_clflush(tmp_vCp);
(void) __builtin_ia32_lfence();
tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
- tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
- tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f);
tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
__m128d test_mm_cvtepi32_pd(__m128i A) {
// CHECK-LABEL: test_mm_cvtepi32_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}})
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
+ // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
return _mm_cvtepi32_pd(A);
}
__m128d test_mm_cvtps_pd(__m128 A) {
// CHECK-LABEL: test_mm_cvtps_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}})
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
+ // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
return _mm_cvtps_pd(A);
}
// Since we do code generation on a function level this needs to error out since
// the subtarget feature won't be available.
-__m256d wombat(__m128i a) {
+__m128 wombat(__m128i a) {
if (__builtin_cpu_supports("avx"))
- return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+ return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}}
else
- return (__m256d){0, 0, 0, 0};
+ return (__m128){0, 0};
}