__m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fnmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fnmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fnmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fnmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}