static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
+ (__v4sf)__A,
+ (__v4sf)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fmadd_round_ss(A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
+ (__v4sf)__B,
+ (__v4sf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
+ (__v4sf)__X,
+ (__v4sf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
+ (__v4sf)__A,
+ -(__v4sf)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fmsub_round_ss(A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
+ (__v4sf)__B,
+ -(__v4sf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- (__v4sf) __X,
- -(__v4sf) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
+ (__v4sf)__X,
+ (__v4sf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
+ -(__v4sf)__A,
+ (__v4sf)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fnmadd_round_ss(A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
- -(__v4sf) __B,
- (__v4sf) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
+ -(__v4sf)__B,
+ (__v4sf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- -(__v4sf) __X,
- (__v4sf) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
+ -(__v4sf)__X,
+ (__v4sf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
+ -(__v4sf)__A,
+ -(__v4sf)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fnmsub_round_ss(A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
- -(__v4sf) __B,
- -(__v4sf) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
+ -(__v4sf)__B,
+ -(__v4sf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
- -(__v4sf) __X,
- -(__v4sf) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
+ -(__v4sf)__X,
+ (__v4sf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
+ (__v2df)__A,
+ (__v2df)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fmadd_round_sd(A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
+ (__v2df)__B,
+ (__v2df)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
+ (__v2df)__X,
+ (__v2df)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
+ (__v2df)__A,
+ -(__v2df)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fmsub_round_sd(A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
+ (__v2df)__B,
+ -(__v2df)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- (__v2df) __X,
- -(__v2df) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
+ (__v2df)__X,
+ (__v2df)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
+ -(__v2df)__A,
+ (__v2df)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fnmadd_round_sd(A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
- -(__v2df) __B,
- (__v2df) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
+ -(__v2df)__B,
+ (__v2df)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- -(__v2df) __X,
- (__v2df) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
+ -(__v2df)__X,
+ (__v2df)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B);
- __W[0] = (__U & 1) ? __Z[0] : __W[0];
- return __W;
+ return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
+ -(__v2df)__A,
+ -(__v2df)__B,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fnmsub_round_sd(A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
- -(__v2df) __B,
- -(__v2df) __C);
- __A[0] = (__U & 1) ? __Z[0] : 0;
- return __A;
+ return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
+ -(__v2df)__B,
+ -(__v2df)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
- -(__v2df) __X,
- -(__v2df) __Y);
- __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
- return __Y;
+ return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
+ -(__v2df)__X,
+ (__v2df)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
__m128 test_mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmadd_ss
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fmadd_ss(__W, __U, __A, __B);
}
__m128 test_mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_ss
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fmadd_ss(__U, __A, __B, __C);
}
__m128 test_mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_ss
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fmadd_ss(__W, __X, __Y, __U);
}
__m128 test_mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmsub_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fmsub_ss(__W, __U, __A, __B);
}
__m128 test_mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fmsub_ss(__U, __A, __B, __C);
}
__m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fmsub_ss(__W, __X, __Y, __U);
}
__m128 test_mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fnmadd_ss(__W, __U, __A, __B);
}
__m128 test_mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fnmadd_ss(__U, __A, __B, __C);
}
__m128 test_mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fnmadd_ss(__W, __X, __Y, __U);
}
__m128 test_mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fnmsub_ss(__W, __U, __A, __B);
}
__m128 test_mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fnmsub_ss(__U, __A, __B, __C);
}
__m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_ss
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi float
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fnmsub_ss(__W, __X, __Y, __U);
}
__m128d test_mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmadd_sd
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fmadd_sd(__W, __U, __A, __B);
}
__m128d test_mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_sd
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fmadd_sd(__U, __A, __B, __C);
}
__m128d test_mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_sd
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fmadd_sd(__W, __X, __Y, __U);
}
__m128d test_mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmsub_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fmsub_sd(__W, __U, __A, __B);
}
__m128d test_mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fmsub_sd(__U, __A, __B, __C);
}
__m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fmsub_sd(__W, __X, __Y, __U);
}
__m128d test_mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fnmadd_sd(__W, __U, __A, __B);
}
__m128d test_mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fnmadd_sd(__U, __A, __B, __C);
}
__m128d test_mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fnmadd_sd(__W, __X, __Y, __U);
}
__m128d test_mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fnmsub_sd(__W, __U, __A, __B);
}
__m128d test_mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fnmsub_sd(__U, __A, __B, __C);
}
__m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_sd
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
- // CHECK: zext i8 %{{.*}} to i32
- // CHECK: and i32 %{{.*}}, 1
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: br label %{{.*}}
-
- // CHECK: phi double
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]])
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fnmsub_sd(__W, __X, __Y, __U);
}