define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_add_ss_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_add_ss_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_add_sd_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_add_sd_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_sub_ss_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_ss_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_sub_sd_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_sd_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_mul_ss_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_ss_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_mul_sd_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_sd_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_div_ss_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_div_ss_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_div_sd_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_div_sd_mask_round(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_max_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_max_ss_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_max_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 4)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_max_sd_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_min_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_min_ss_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: ret <4 x float> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_min_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 4)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_min_sd_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: ret <2 x double> [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
; CHECK-LABEL: @test_mask3_vfmsub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
-; CHECK-NEXT: ret <4 x float> [[TMP9]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
+; CHECK-NEXT: ret <4 x float> [[TMP10]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
-; CHECK-NEXT: ret float [[TMP8]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
+; CHECK-NEXT: ret float [[TMP9]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
; CHECK-LABEL: @test_mask3_vfmsub_sd(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
-; CHECK-NEXT: ret <2 x double> [[TMP9]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
+; CHECK-NEXT: ret <2 x double> [[TMP10]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
-; CHECK-NEXT: ret double [[TMP8]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
+; CHECK-NEXT: ret double [[TMP9]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_ss(
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
-; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS1]]
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
-; CHECK-NEXT: ret <4 x float> [[TMP9]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0
+; CHECK-NEXT: ret <4 x float> [[TMP11]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS1]]
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
-; CHECK-NEXT: ret float [[TMP8]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
+; CHECK-NEXT: ret float [[TMP10]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_sd(
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS1]]
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
-; CHECK-NEXT: ret <2 x double> [[TMP9]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0
+; CHECK-NEXT: ret <2 x double> [[TMP11]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
-; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS1]]
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
-; CHECK-NEXT: ret double [[TMP8]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
+; CHECK-NEXT: ret double [[TMP10]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a