TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfnmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfnmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfnmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
#undef __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
#undef __DEFAULT_FN_ATTRS
#include <immintrin.h>
__m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
+ // CHECK-LABEL: test_mm_fmadd_ps
// CHECK: @llvm.x86.fma.vfmadd.ps
return _mm_fmadd_ps(a, b, c);
}
__m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) {
+ // CHECK-LABEL: test_mm_fmadd_pd
// CHECK: @llvm.x86.fma.vfmadd.pd
return _mm_fmadd_pd(a, b, c);
}
__m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) {
+ // CHECK-LABEL: test_mm_fmadd_ss
// CHECK: @llvm.x86.fma.vfmadd.ss
return _mm_fmadd_ss(a, b, c);
}
__m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) {
+ // CHECK-LABEL: test_mm_fmadd_sd
// CHECK: @llvm.x86.fma.vfmadd.sd
return _mm_fmadd_sd(a, b, c);
}
__m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
- // CHECK: @llvm.x86.fma.vfmsub.ps
+ // CHECK-LABEL: test_mm_fmsub_ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_fmsub_ps(a, b, c);
}
__m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) {
- // CHECK: @llvm.x86.fma.vfmsub.pd
+ // CHECK-LABEL: test_mm_fmsub_pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_fmsub_pd(a, b, c);
}
__m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
+ // CHECK-LABEL: test_mm_fmsub_ss
// CHECK: @llvm.x86.fma.vfmsub.ss
return _mm_fmsub_ss(a, b, c);
}
__m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
+ // CHECK-LABEL: test_mm_fmsub_sd
// CHECK: @llvm.x86.fma.vfmsub.sd
return _mm_fmsub_sd(a, b, c);
}
__m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
- // CHECK: @llvm.x86.fma.vfnmadd.ps
+ // CHECK-LABEL: test_mm_fnmadd_ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
return _mm_fnmadd_ps(a, b, c);
}
__m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) {
- // CHECK: @llvm.x86.fma.vfnmadd.pd
+ // CHECK-LABEL: test_mm_fnmadd_pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
return _mm_fnmadd_pd(a, b, c);
}
__m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
+ // CHECK-LABEL: test_mm_fnmadd_ss
// CHECK: @llvm.x86.fma.vfnmadd.ss
return _mm_fnmadd_ss(a, b, c);
}
__m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
+ // CHECK-LABEL: test_mm_fnmadd_sd
// CHECK: @llvm.x86.fma.vfnmadd.sd
return _mm_fnmadd_sd(a, b, c);
}
__m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
- // CHECK: @llvm.x86.fma.vfnmsub.ps
+ // CHECK-LABEL: test_mm_fnmsub_ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
return _mm_fnmsub_ps(a, b, c);
}
__m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) {
- // CHECK: @llvm.x86.fma.vfnmsub.pd
+ // CHECK-LABEL: test_mm_fnmsub_pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
return _mm_fnmsub_pd(a, b, c);
}
__m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
+ // CHECK-LABEL: test_mm_fnmsub_ss
// CHECK: @llvm.x86.fma.vfnmsub.ss
return _mm_fnmsub_ss(a, b, c);
}
__m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
+ // CHECK-LABEL: test_mm_fnmsub_sd
// CHECK: @llvm.x86.fma.vfnmsub.sd
return _mm_fnmsub_sd(a, b, c);
}
__m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
+ // CHECK-LABEL: test_mm_fmaddsub_ps
// CHECK: @llvm.x86.fma.vfmaddsub.ps
return _mm_fmaddsub_ps(a, b, c);
}
__m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
+ // CHECK-LABEL: test_mm_fmaddsub_pd
// CHECK: @llvm.x86.fma.vfmaddsub.pd
return _mm_fmaddsub_pd(a, b, c);
}
__m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
- // CHECK: @llvm.x86.fma.vfmsubadd.ps
+ // CHECK-LABEL: test_mm_fmsubadd_ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_fmsubadd_ps(a, b, c);
}
__m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
- // CHECK: @llvm.x86.fma.vfmsubadd.pd
+ // CHECK-LABEL: test_mm_fmsubadd_pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_fmsubadd_pd(a, b, c);
}
__m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) {
+ // CHECK-LABEL: test_mm256_fmadd_ps
// CHECK: @llvm.x86.fma.vfmadd.ps.256
return _mm256_fmadd_ps(a, b, c);
}
__m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) {
+ // CHECK-LABEL: test_mm256_fmadd_pd
// CHECK: @llvm.x86.fma.vfmadd.pd.256
return _mm256_fmadd_pd(a, b, c);
}
__m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) {
- // CHECK: @llvm.x86.fma.vfmsub.ps.256
+ // CHECK-LABEL: test_mm256_fmsub_ps
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
return _mm256_fmsub_ps(a, b, c);
}
__m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) {
- // CHECK: @llvm.x86.fma.vfmsub.pd.256
+ // CHECK-LABEL: test_mm256_fmsub_pd
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
return _mm256_fmsub_pd(a, b, c);
}
__m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) {
- // CHECK: @llvm.x86.fma.vfnmadd.ps.256
+ // CHECK-LABEL: test_mm256_fnmadd_ps
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> %{{.+}})
return _mm256_fnmadd_ps(a, b, c);
}
__m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) {
- // CHECK: @llvm.x86.fma.vfnmadd.pd.256
+ // CHECK-LABEL: test_mm256_fnmadd_pd
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> %{{.+}})
return _mm256_fnmadd_pd(a, b, c);
}
__m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) {
- // CHECK: @llvm.x86.fma.vfnmsub.ps.256
+ // CHECK-LABEL: test_mm256_fnmsub_ps
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> [[NEG2]])
return _mm256_fnmsub_ps(a, b, c);
}
__m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
- // CHECK: @llvm.x86.fma.vfnmsub.pd.256
+ // CHECK-LABEL: test_mm256_fnmsub_pd
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> [[NEG2]])
return _mm256_fnmsub_pd(a, b, c);
}
__m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
+ // CHECK-LABEL: test_mm256_fmaddsub_ps
// CHECK: @llvm.x86.fma.vfmaddsub.ps.256
return _mm256_fmaddsub_ps(a, b, c);
}
__m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
+ // CHECK-LABEL: test_mm256_fmaddsub_pd
// CHECK: @llvm.x86.fma.vfmaddsub.pd.256
return _mm256_fmaddsub_pd(a, b, c);
}
__m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
- // CHECK: @llvm.x86.fma.vfmsubadd.ps.256
+ // CHECK-LABEL: test_mm256_fmsubadd_ps
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
return _mm256_fmsubadd_ps(a, b, c);
}
__m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
- // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
+ // CHECK-LABEL: test_mm256_fmsubadd_pd
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
return _mm256_fmsubadd_pd(a, b, c);
}
__m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msub_ps
- // CHECK: @llvm.x86.fma.vfmsub.ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_msub_ps(a, b, c);
}
__m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_msub_pd
- // CHECK: @llvm.x86.fma.vfmsub.pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_msub_pd(a, b, c);
}
__m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmacc_ps
- // CHECK: @llvm.x86.fma.vfnmadd.ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
return _mm_nmacc_ps(a, b, c);
}
__m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmacc_pd
- // CHECK: @llvm.x86.fma.vfnmadd.pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
return _mm_nmacc_pd(a, b, c);
}
__m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmsub_ps
- // CHECK: @llvm.x86.fma.vfnmsub.ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
return _mm_nmsub_ps(a, b, c);
}
__m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmsub_pd
- // CHECK: @llvm.x86.fma.vfnmsub.pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
return _mm_nmsub_pd(a, b, c);
}
__m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msubadd_ps
- // CHECK: @llvm.x86.fma.vfmsubadd.ps
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_msubadd_ps(a, b, c);
}
__m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_msubadd_pd
- // CHECK: @llvm.x86.fma.vfmsubadd.pd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_msubadd_pd(a, b, c);
}
__m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_msub_ps
- // CHECK: @llvm.x86.fma.vfmsub.ps.256
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
return _mm256_msub_ps(a, b, c);
}
__m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_msub_pd
- // CHECK: @llvm.x86.fma.vfmsub.pd.256
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
return _mm256_msub_pd(a, b, c);
}
__m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_nmacc_ps
- // CHECK: @llvm.x86.fma.vfnmadd.ps.256
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> %{{.+}})
return _mm256_nmacc_ps(a, b, c);
}
__m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_nmacc_pd
- // CHECK: @llvm.x86.fma.vfnmadd.pd.256
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> %{{.+}})
return _mm256_nmacc_pd(a, b, c);
}
__m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_nmsub_ps
- // CHECK: @llvm.x86.fma.vfnmsub.ps.256
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> [[NEG2]])
return _mm256_nmsub_ps(a, b, c);
}
__m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_nmsub_pd
- // CHECK: @llvm.x86.fma.vfnmsub.pd.256
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> [[NEG2]])
return _mm256_nmsub_pd(a, b, c);
}
__m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_msubadd_ps
- // CHECK: @llvm.x86.fma.vfmsubadd.ps.256
+ // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
return _mm256_msubadd_ps(a, b, c);
}
__m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_msubadd_pd
- // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
+ // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
return _mm256_msubadd_pd(a, b, c);
}