// FMA
TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
__m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmsub_ss
- // CHECK: @llvm.x86.fma.vfmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_fmsub_ss(a, b, c);
}
__m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fmsub_sd
- // CHECK: @llvm.x86.fma.vfmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_fmsub_sd(a, b, c);
}
__m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmadd_ss
- // CHECK: @llvm.x86.fma.vfnmadd.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> %{{.+}})
return _mm_fnmadd_ss(a, b, c);
}
__m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fnmadd_sd
- // CHECK: @llvm.x86.fma.vfnmadd.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> %{{.+}})
return _mm_fnmadd_sd(a, b, c);
}
__m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmsub_ss
- // CHECK: @llvm.x86.fma.vfnmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> [[NEG2]])
return _mm_fnmsub_ss(a, b, c);
}
__m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fnmsub_sd
- // CHECK: @llvm.x86.fma.vfnmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> [[NEG2]])
return _mm_fnmsub_sd(a, b, c);
}
__m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_macc_ss
- // CHECK: @llvm.x86.fma.vfmadd.ss
+ // CHECK: @llvm.x86.fma4.vfmadd.ss
return _mm_macc_ss(a, b, c);
}
__m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_macc_sd
- // CHECK: @llvm.x86.fma.vfmadd.sd
+ // CHECK: @llvm.x86.fma4.vfmadd.sd
return _mm_macc_sd(a, b, c);
}
__m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msub_ss
- // CHECK: @llvm.x86.fma.vfmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_msub_ss(a, b, c);
}
__m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_msub_sd
- // CHECK: @llvm.x86.fma.vfmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_msub_sd(a, b, c);
}
__m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmacc_ss
- // CHECK: @llvm.x86.fma.vfnmadd.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
return _mm_nmacc_ss(a, b, c);
}
__m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmacc_sd
- // CHECK: @llvm.x86.fma.vfnmadd.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
return _mm_nmacc_sd(a, b, c);
}
__m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmsub_ss
- // CHECK: @llvm.x86.fma.vfnmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
return _mm_nmsub_ss(a, b, c);
}
__m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmsub_sd
- // CHECK: @llvm.x86.fma.vfnmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
return _mm_nmsub_sd(a, b, c);
}