[X86] Make lowering of intrinsics with rounding mode stricter so that only valid...

author Craig Topper <craig.topper@intel.com>

Sun, 10 Mar 2019 17:20:45 +0000 (17:20 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 10 Mar 2019 17:20:45 +0000 (17:20 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 10 Mar 2019 17:20:45 +0000 (17:20 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 10 Mar 2019 17:20:45 +0000 (17:20 +0000)
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h

index 62dd685b36078aaa539526b149a52e81c1f1cdc9..5bd4709343d514c39fa99858d29722fd7831129e 100644 (file)
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -48,7 +48,8 @@ namespace X86 {
      TO_NEG_INF = 1,
      TO_POS_INF = 2,
      TO_ZERO = 3,
-    CUR_DIRECTION = 4
+    CUR_DIRECTION = 4,
+    NO_EXC = 8
    };
  
    /// The constants to describe instr prefixes if there are
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 7d3bdaf9f57ce35596c28086f461124a17b85d19..0294ae1ea2de68f6c9e38933e16453718858cbb3 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -21876,11 +21876,31 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const {
    // Helper to detect if the operand is CUR_DIRECTION rounding mode.
    auto isRoundModeCurDirection = [](SDValue Rnd) {
-    if (!isa<ConstantSDNode>(Rnd))
-      return false;
+    if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+      return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;
  
-    unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
-    return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
+    return false;
+  };
+  auto isRoundModeSAE = [](SDValue Rnd) {
+    if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+      return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC;
+
+    return false;
+  };
+  auto isRoundModeSAEToX = [](SDValue Rnd) {
+    if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
+      unsigned Round = C->getZExtValue();
+      if (Round & X86::STATIC_ROUNDING::NO_EXC) {
+        // Clear the NO_EXC bit and check remaining bits.
+        Round ^= X86::STATIC_ROUNDING::NO_EXC;
+        return Round == X86::STATIC_ROUNDING::TO_NEAREST_INT ||
+               Round == X86::STATIC_ROUNDING::TO_NEG_INF ||
+               Round == X86::STATIC_ROUNDING::TO_POS_INF ||
+               Round == X86::STATIC_ROUNDING::TO_ZERO;
+      }
+    }
+
+    return false;
    };
  
    SDLoc dl(Op);
@@ -21896,10 +21916,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(2);
-        if (!isRoundModeCurDirection(Rnd)) {
+        if (isRoundModeSAEToX(Rnd))
            return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
                               Op.getOperand(1), Rnd);
-        }
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
      }
@@ -21912,10 +21933,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(3);
-        if (!isRoundModeCurDirection(Rnd)) {
+        if (isRoundModeSAEToX(Rnd))
            return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
                               Op.getOperand(1), Src2, Rnd);
-        }
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
  
        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
@@ -21936,11 +21958,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd)) {
+        if (isRoundModeSAEToX(Rnd))
            return DAG.getNode(IntrWithRoundingModeOpcode,
                               dl, Op.getValueType(),
                               Src1, Src2, Src3, Rnd);
-        }
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
  
        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
@@ -21977,12 +22000,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd)) {
+        if (isRoundModeSAEToX(Rnd))
            return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                        dl, Op.getValueType(),
                                        Src, Rnd),
                                        Mask, PassThru, Subtarget, DAG);
-        }
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
                                    Mask, PassThru, Subtarget, DAG);
@@ -22000,10 +22024,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        if (Op.getNumOperands() == (5U + HasRounding)) {
          if (HasRounding) {
            SDValue Rnd = Op.getOperand(5);
-          if (!isRoundModeCurDirection(Rnd))
+          if (isRoundModeSAEToX(Rnd))
              return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                                      dl, VT, Src1, Src2, Rnd),
                                          Mask, passThru, Subtarget, DAG);
+          if (!isRoundModeCurDirection(Rnd))
+            return SDValue();
          }
          return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
                                                  Src2),
@@ -22015,11 +22041,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        SDValue RoundingMode = Op.getOperand(5);
        if (HasRounding) {
          SDValue Sae = Op.getOperand(6);
-        if (!isRoundModeCurDirection(Sae))
+        if (isRoundModeSAE(Sae))
            return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                                    dl, VT, Src1, Src2,
                                                    RoundingMode, Sae),
                                        Mask, passThru, Subtarget, DAG);
+        if (!isRoundModeCurDirection(Sae))
+          return SDValue();
        }
        return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
                                                Src2, RoundingMode),
@@ -22048,12 +22076,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(5);
-        if (!isRoundModeCurDirection(Rnd)) {
+        if (isRoundModeSAEToX(Rnd))
            return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                        dl, Op.getValueType(),
                                        Src1, Src2, Rnd),
                                        Mask, PassThru, Subtarget, DAG);
-        }
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
        // TODO: Intrinsics should have fast-math-flags to propagate.
        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
@@ -22087,10 +22116,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(6);
-        if (!isRoundModeCurDirection(Rnd))
+        if (isRoundModeSAEToX(Rnd))
            return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                                    dl, VT, Src1, Src2, Src3, Rnd),
                                        Mask, PassThru, Subtarget, DAG);
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
        return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
                                                Src2, Src3),
@@ -22109,12 +22140,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
        if (IntrWithRoundingModeOpcode != 0) {
          SDValue Rnd = Op.getOperand(6);
-        if (!isRoundModeCurDirection(Rnd)) {
+        if (isRoundModeSAEToX(Rnd))
            return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                        dl, Op.getValueType(),
                                        Src1, Src2, Src3, Rnd),
                                        Mask, PassThru, Subtarget, DAG);
-        }
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
        }
        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
                                                Src1, Src2, Src3),
@@ -22167,17 +22199,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        // First, we check if the intrinsic may have non-default rounding mode,
        // (IntrData->Opc1 != 0), then we check the rounding mode operand.
        if (IntrData->Opc1 != 0) {
-        SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd))
-          Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
-                            Op.getOperand(2), CC, Rnd);
+        SDValue Sae = Op.getOperand(4);
+        if (isRoundModeSAE(Sae))
+          return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+                             Op.getOperand(2), CC, Sae);
+        if (!isRoundModeCurDirection(Sae))
+          return SDValue();
        }
        //default rounding mode
-      if (!Cmp.getNode())
-        Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+      return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
                            Op.getOperand(2), CC);
-
-      return Cmp;
      }
      case CMP_MASK_SCALAR_CC: {
        SDValue Src1 = Op.getOperand(1);
@@ -22187,9 +22218,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  
        SDValue Cmp;
        if (IntrData->Opc1 != 0) {
-        SDValue Rnd = Op.getOperand(5);
-        if (!isRoundModeCurDirection(Rnd))
-          Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
+        SDValue Sae = Op.getOperand(5);
+        if (isRoundModeSAE(Sae))
+          Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae);
+        else if (!isRoundModeCurDirection(Sae))
+          return SDValue();
        }
        //default rounding mode
        if (!Cmp.getNode())
@@ -22252,9 +22285,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        if (isRoundModeCurDirection(Sae))
          FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
                             DAG.getConstant(CondVal, dl, MVT::i8));
-      else
+      else if (isRoundModeSAE(Sae))
          FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
                             DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+      else
+        return SDValue();
        // Need to fill with zeros to ensure the bitcast will produce zeroes
        // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
        SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll

index 1089500e390f379e30699d5458a48fd85ffad030..552bbe99d600687a302c1bc6ebc5269e064fc415 100644 (file)
--- a/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll
@@ -227,7 +227,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 8) nounwind
    ret <16 x float> %res
  }
  
@@ -237,7 +237,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 9) nounwind
    ret <16 x float> %res
  }
  
@@ -247,7 +247,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 10) nounwind
    ret <16 x float> %res
  }
  
@@ -257,7 +257,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 11) nounwind
    ret <16 x float> %res
  }
  
@@ -276,7 +276,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 8) nounwind
    ret <16 x float> %res
  }
  
@@ -285,7 +285,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 9) nounwind
    ret <16 x float> %res
  }
  
@@ -294,7 +294,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 10) nounwind
    ret <16 x float> %res
  }
  
@@ -303,7 +303,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 11) nounwind
    ret <16 x float> %res
  }
  
@@ -348,7 +348,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind
    ret <8 x double> %res
  }
  
@@ -358,7 +358,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind
    ret <8 x double> %res
  }
  
@@ -368,7 +368,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind
    ret <8 x double> %res
  }
  
@@ -378,7 +378,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind
    ret <8 x double> %res
  }
  
@@ -397,7 +397,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind
    ret <8 x double> %res
  }
  
@@ -406,7 +406,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind
    ret <8 x double> %res
  }
  
@@ -415,7 +415,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind
    ret <8 x double> %res
  }
  
@@ -424,7 +424,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind
    ret <8 x double> %res
  }
  
@@ -514,7 +514,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind
    ret <8 x double> %res
  }
  
@@ -524,7 +524,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind
    ret <8 x double> %res
  }
  
@@ -534,7 +534,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind
    ret <8 x double> %res
  }
  
@@ -544,7 +544,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8
  ; CHECK-NEXT:    kmovw %edi, %k1
  ; CHECK-NEXT:    vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind
    ret <8 x double> %res
  }
  
@@ -563,7 +563,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind
    ret <8 x double> %res
  }
  
@@ -572,7 +572,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind
    ret <8 x double> %res
  }
  
@@ -581,7 +581,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind
    ret <8 x double> %res
  }
  
@@ -590,7 +590,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind
    ret <8 x double> %res
  }
  
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll

index 7b3f685d7b18ea2ac751f3fd40bc89187abb4a90..99438064f85ce82d53611cf85ceca88b69bc3895 100644 (file)
--- a/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -405,7 +405,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 0) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
    %bc = bitcast i16 %mask to <16 x i1>
    %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
    ret <16 x float> %sel
@@ -423,7 +423,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 1) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
    %bc = bitcast i16 %mask to <16 x i1>
    %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
    ret <16 x float> %sel
@@ -441,7 +441,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 2) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
    %bc = bitcast i16 %mask to <16 x i1>
    %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
    ret <16 x float> %sel
@@ -459,7 +459,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
    %bc = bitcast i16 %mask to <16 x i1>
    %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
    ret <16 x float> %sel
@@ -490,7 +490,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 0) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
    ret <16 x float> %res
  }
  
@@ -499,7 +499,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 1) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
    ret <16 x float> %res
  }
  
@@ -508,7 +508,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 2) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
    ret <16 x float> %res
  }
  
@@ -517,7 +517,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
    ret <16 x float> %res
  }
  
@@ -591,7 +591,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
    %bc = bitcast i8 %mask to <8 x i1>
    %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
    ret <8 x double> %sel
@@ -610,7 +610,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
    %bc = bitcast i8 %mask to <8 x i1>
    %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
    ret <8 x double> %sel
@@ -629,7 +629,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
    %bc = bitcast i8 %mask to <8 x i1>
    %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
    ret <8 x double> %sel
@@ -648,7 +648,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x
  ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
    %bc = bitcast i8 %mask to <8 x i1>
    %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
    ret <8 x double> %sel
@@ -680,7 +680,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
    ret <8 x double> %res
  }
  
@@ -689,7 +689,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
    ret <8 x double> %res
  }
  
@@ -698,7 +698,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
    ret <8 x double> %res
  }
  
@@ -707,7 +707,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
    ret <8 x double> %res
  }
  
@@ -863,7 +863,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8
  ; X64-NEXT:    retq # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 0)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
    %4 = bitcast i8 %mask to <8 x i1>
    %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
    ret <8 x double> %5
@@ -884,7 +884,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8
  ; X64-NEXT:    retq # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 1)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
    %4 = bitcast i8 %mask to <8 x i1>
    %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
    ret <8 x double> %5
@@ -905,7 +905,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8
  ; X64-NEXT:    retq # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 2)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
    %4 = bitcast i8 %mask to <8 x i1>
    %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
    ret <8 x double> %5
@@ -926,7 +926,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8
  ; X64-NEXT:    retq # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 3)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
    %4 = bitcast i8 %mask to <8 x i1>
    %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
    ret <8 x double> %5
@@ -962,7 +962,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 0)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
    ret <8 x double> %3
  }
  
@@ -973,7 +973,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 1)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
    ret <8 x double> %3
  }
  
@@ -984,7 +984,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 2)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
    ret <8 x double> %3
  }
  
@@ -995,7 +995,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8
  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
    %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
-  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 3)
+  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
    ret <8 x double> %3
  }
  
diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll

index 4124aa65aad4edac5513a836217b4bd4906c1448..9d2691cfd1cce56d2f60b14c74213c46b2ecaf72 100644 (file)
--- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -7194,7 +7194,7 @@ define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 0)
+                    <16 x float> zeroinitializer, i16 -1, i32 8)
    ret <16 x float> %res
  }
  
@@ -7204,7 +7204,7 @@ define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 1)
+                    <16 x float> zeroinitializer, i16 -1, i32 9)
    ret <16 x float> %res
  }
  
@@ -7214,7 +7214,7 @@ define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 2)
+                    <16 x float> zeroinitializer, i16 -1, i32 10)
    ret <16 x float> %res
  }
  
@@ -7224,7 +7224,7 @@ define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 3)
+                    <16 x float> zeroinitializer, i16 -1, i32 11)
    ret <16 x float> %res
  }
  
@@ -7234,7 +7234,7 @@ define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 0)
+                    <16 x float> zeroinitializer, i16 -1, i32 8)
    ret <16 x float> %res
  }
  
@@ -7244,7 +7244,7 @@ define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 1)
+                    <16 x float> zeroinitializer, i16 -1, i32 9)
    ret <16 x float> %res
  }
  
@@ -7254,7 +7254,7 @@ define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 2)
+                    <16 x float> zeroinitializer, i16 -1, i32 10)
    ret <16 x float> %res
  }
  
@@ -7264,7 +7264,7 @@ define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 -1, i32 3)
+                    <16 x float> zeroinitializer, i16 -1, i32 11)
    ret <16 x float> %res
  }
  
@@ -7282,7 +7282,7 @@ define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16
  ; X64-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 %mask, i32 0)
+                    <16 x float> zeroinitializer, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  
@@ -7299,7 +7299,7 @@ define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16
  ; X64-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 %mask, i32 1)
+                    <16 x float> zeroinitializer, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  
@@ -7316,7 +7316,7 @@ define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16
  ; X64-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 %mask, i32 2)
+                    <16 x float> zeroinitializer, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7333,7 +7333,7 @@ define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16
  ; X64-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> zeroinitializer, i16 %mask, i32 3)
+                    <16 x float> zeroinitializer, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -7353,7 +7353,7 @@ define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float>
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> %passthru, i16 %mask, i32 0)
+                    <16 x float> %passthru, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  
@@ -7372,7 +7372,7 @@ define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float>
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> %passthru, i16 %mask, i32 1)
+                    <16 x float> %passthru, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  
@@ -7391,7 +7391,7 @@ define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float>
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> %passthru, i16 %mask, i32 2)
+                    <16 x float> %passthru, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7410,7 +7410,7 @@ define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float>
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
-                    <16 x float> %passthru, i16 %mask, i32 3)
+                    <16 x float> %passthru, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -7429,7 +7429,7 @@ define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8
  ; X64-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
-                    <8 x double> zeroinitializer, i8 %mask, i32 0)
+                    <8 x double> zeroinitializer, i8 %mask, i32 8)
    ret <8 x double> %res
  }
  
@@ -7447,7 +7447,7 @@ define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8
  ; X64-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
-                    <8 x double> zeroinitializer, i8 %mask, i32 1)
+                    <8 x double> zeroinitializer, i8 %mask, i32 9)
    ret <8 x double> %res
  }
  
@@ -7465,7 +7465,7 @@ define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8
  ; X64-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
-                    <8 x double> zeroinitializer, i8 %mask, i32 2)
+                    <8 x double> zeroinitializer, i8 %mask, i32 10)
    ret <8 x double> %res
  }
  
@@ -7483,7 +7483,7 @@ define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8
  ; X64-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
-                    <8 x double> zeroinitializer, i8 %mask, i32 3)
+                    <8 x double> zeroinitializer, i8 %mask, i32 11)
    ret <8 x double> %res
  }
  
@@ -7499,7 +7499,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7514,7 +7514,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7529,7 +7529,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7545,7 +7545,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -7580,7 +7580,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
@@ -7597,7 +7597,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
@@ -7614,7 +7614,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7632,7 +7632,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -7661,7 +7661,7 @@ define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7669,7 +7669,7 @@ define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7677,7 +7677,7 @@ define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
    ret <16 x float> %res
  }
  
@@ -7686,7 +7686,7 @@ define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
    ret <16 x float> %res
  }
  
@@ -7714,7 +7714,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
@@ -7731,7 +7731,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
@@ -7748,7 +7748,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7766,7 +7766,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -7794,7 +7794,7 @@ define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7802,7 +7802,7 @@ define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7810,7 +7810,7 @@ define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
    ret <16 x float> %res
  }
  
@@ -7819,7 +7819,7 @@ define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
    ret <16 x float> %res
  }
  
@@ -7844,7 +7844,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7859,7 +7859,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -7874,7 +7874,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7890,7 +7890,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16
  ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; X64-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -7925,7 +7925,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
@@ -7942,7 +7942,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
@@ -7959,7 +7959,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
    ret <16 x float> %res
  }
  
@@ -7977,7 +7977,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x
  ; X64-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
  ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
  ; X64-NEXT:    retq ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
    ret <16 x float> %res
  }
  
@@ -8006,7 +8006,7 @@ define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -8014,7 +8014,7 @@ define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
    ret <16 x float> %res
  }
  define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
@@ -8022,7 +8022,7 @@ define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
    ret <16 x float> %res
  }
  
@@ -8031,7 +8031,7 @@ define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
  ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
    ret <16 x float> %res
  }
  
@@ -9066,8 +9066,8 @@ define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x do
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
    %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
-  %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
-  %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
    %res4 = fadd <2 x double> %res, %res1
    %res5 = fadd <2 x double> %res2, %res3
    %res6 = fadd <2 x double> %res4, %res5
@@ -9113,8 +9113,8 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x floa
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
    %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
-  %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
-  %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
    %res4 = fadd <4 x float> %res, %res1
    %res5 = fadd <4 x float> %res2, %res3
    %res6 = fadd <4 x float> %res4, %res5
@@ -9145,7 +9145,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x d
  ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
-  %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+  %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
    %res2 = fadd <2 x double> %res, %res1
    ret <2 x double> %res2
  }
@@ -9168,7 +9168,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x flo
  ; X64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
-  %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
    %res2 = fadd <4 x float> %res, %res1
    ret <4 x float> %res
  }
@@ -9211,8 +9211,8 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x d
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
    %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
-  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
-  %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
    %res4 = fadd <2 x double> %res, %res1
    %res5 = fadd <2 x double> %res2, %res3
    %res6 = fadd <2 x double> %res4, %res5
@@ -9258,8 +9258,8 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x flo
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
    %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
-  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
-  %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
    %res4 = fadd <4 x float> %res, %res1
    %res5 = fadd <4 x float> %res2, %res3
    %res6 = fadd <4 x float> %res4, %res5
@@ -9481,8 +9481,8 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x d
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
    %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
-  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
-  %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
    %res4 = fadd <2 x double> %res, %res1
    %res5 = fadd <2 x double> %res2, %res3
    %res6 = fadd <2 x double> %res4, %res5
@@ -9528,8 +9528,8 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x flo
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
    %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
-  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
-  %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
    %res4 = fadd <4 x float> %res, %res1
    %res5 = fadd <4 x float> %res2, %res3
    %res6 = fadd <4 x float> %res4, %res5
@@ -9575,8 +9575,8 @@ define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
    %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
-  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
-  %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
    %res4 = fadd <2 x double> %res, %res1
    %res5 = fadd <2 x double> %res2, %res3
    %res6 = fadd <2 x double> %res4, %res5
@@ -9622,8 +9622,8 @@ define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x fl
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
    %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
-  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
-  %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
    %res4 = fadd <4 x float> %res, %res1
    %res5 = fadd <4 x float> %res2, %res3
    %res6 = fadd <4 x float> %res4, %res5
@@ -9740,7 +9740,7 @@ define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16
  ; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
-  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
    %res2 = fadd <16 x float> %res, %res1
    ret <16 x float> %res2
  }
@@ -9764,7 +9764,7 @@ define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16
  ; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
-  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
    %res2 = fadd <16 x float> %res, %res1
    ret <16 x float> %res2
  }
diff --git a/test/CodeGen/X86/avx512-intrinsics-x86_64.ll b/test/CodeGen/X86/avx512-intrinsics-x86_64.ll

index 196b3e243436fd1695f0ef48beed2a328079ad21..a05fabe13e38e86fb6f43a0aba4b666e147f558f 100644 (file)
--- a/test/CodeGen/X86/avx512-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-x86_64.ll
@@ -125,8 +125,8 @@ define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
  ; CHECK-NEXT:    retq
  
    %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
-  %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3)
-  %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1)
+  %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 11)
+  %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 9)
    %res3 = add i64 %res, %res1
    %res4 = add i64 %res3, %res2
    ret i64 %res4
@@ -144,8 +144,8 @@ define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
  ; CHECK-NEXT:    retq
  
    %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
-  %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3)
-  %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1)
+  %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 11)
+  %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 9)
    %res3 = add i64 %res, %res1
    %res4 = add i64 %res3, %res2
    ret i64 %res4
@@ -163,8 +163,8 @@ define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
  ; CHECK-NEXT:    retq
  
    %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
-  %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3)
-  %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1)
+  %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 11)
+  %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 9)
    %res3 = add i64 %res, %res1
    %res4 = add i64 %res3, %res2
    ret i64 %res4
@@ -182,8 +182,8 @@ define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
  ; CHECK-NEXT:    retq
  
    %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
-  %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3)
-  %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1)
+  %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 11)
+  %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 9)
    %res3 = add i64 %res, %res1
    %res4 = add i64 %res3, %res2
    ret i64 %res4
@@ -195,7 +195,7 @@ define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
  ; CHECK-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 11) ; <<<2 x double>> [#uses=1]
    ret <2 x double> %res
  }
  declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
@@ -205,7 +205,7 @@ define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
  ; CHECK-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 11) ; <<<4 x float>> [#uses=1]
    ret <4 x float> %res
  }
  declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
@@ -215,7 +215,7 @@ define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) {
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
  ; CHECK-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 9) ; <<<4 x float>> [#uses=1]
    ret <4 x float> %res
  }
  
@@ -234,7 +234,7 @@ define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) {
  ; CHECK:       ## %bb.0:
  ; CHECK-NEXT:    vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
  ; CHECK-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 9) ; <<<2 x double>> [#uses=1]
    ret <2 x double> %res
  }
  
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll

index 3e3da21d56999d9d00a8cebd05f400f85aa595f6..2c88ac500c4121fa6439caf7a0c5adeae4d56b92 100644 (file)
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -809,9 +809,9 @@ define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %
  ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0
  ; X86-NEXT:    retl
    %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
-  %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
-  %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
-  %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 10)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 11)
  
    %res.1 = fadd <4 x float> %res0, %res1
    %res.2 = fadd <4 x float> %res2, %res3
@@ -849,9 +849,9 @@ define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
  ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
  ; X86-NEXT:    retl
    %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
-  %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
-  %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
-  %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 10)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 11)
  
    %res.1 = fadd <2 x double> %res0, %res1
    %res.2 = fadd <2 x double> %res2, %res3
@@ -942,8 +942,8 @@ define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) {
  ; CHECK-NEXT:    ret{{[l|q]}}
  
    %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
-  %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3)
-  %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1)
+  %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 11)
+  %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 9)
    %res3 = add i32 %res, %res1
    %res4 = add i32 %res3, %res2
    ret i32 %res4
@@ -961,8 +961,8 @@ define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) {
  ; CHECK-NEXT:    ret{{[l|q]}}
  
    %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
-  %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3)
-  %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1)
+  %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 11)
+  %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 9)
    %res3 = add i32 %res, %res1
    %res4 = add i32 %res3, %res2
    ret i32 %res4
@@ -980,8 +980,8 @@ define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) {
  ; CHECK-NEXT:    ret{{[l|q]}}
  
    %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
-  %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3)
-  %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1)
+  %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 11)
+  %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 9)
    %res3 = add i32 %res, %res1
    %res4 = add i32 %res3, %res2
    ret i32 %res4
@@ -999,8 +999,8 @@ define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
  ; CHECK-NEXT:    ret{{[l|q]}}
  
    %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
-  %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3)
-  %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1)
+  %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 11)
+  %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 9)
    %res3 = add i32 %res, %res1
    %res4 = add i32 %res3, %res2
    ret i32 %res4
@@ -1192,7 +1192,7 @@ define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    ret <16 x float> %1
  }
  
@@ -1201,7 +1201,7 @@ define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    ret <16 x float> %1
  }
  
@@ -1210,7 +1210,7 @@ define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    ret <16 x float> %1
  }
  
@@ -1219,7 +1219,7 @@ define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    ret <16 x float> %1
  }
  
@@ -1228,7 +1228,7 @@ define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    ret <16 x float> %1
  }
  
@@ -1237,7 +1237,7 @@ define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    ret <16 x float> %1
  }
  
@@ -1246,7 +1246,7 @@ define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    ret <16 x float> %1
  }
  
@@ -1255,7 +1255,7 @@ define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    ret <16 x float> %1
  }
  
@@ -1272,7 +1272,7 @@ define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1290,7 +1290,7 @@ define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1308,7 +1308,7 @@ define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1326,7 +1326,7 @@ define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1347,7 +1347,7 @@ define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float>
  ; X86-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
    ret <16 x float> %3
@@ -1367,7 +1367,7 @@ define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float>
  ; X86-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
    ret <16 x float> %3
@@ -1387,7 +1387,7 @@ define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float>
  ; X86-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
    ret <16 x float> %3
@@ -1407,7 +1407,7 @@ define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float>
  ; X86-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
    ret <16 x float> %3
@@ -1427,7 +1427,7 @@ define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8
  ; X86-NEXT:    kmovw %eax, %k1
  ; X86-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0)
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 8)
    %2 = bitcast i8 %mask to <8 x i1>
    %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
    ret <8 x double> %3
@@ -1446,7 +1446,7 @@ define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8
  ; X86-NEXT:    kmovw %eax, %k1
  ; X86-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 1)
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 9)
    %2 = bitcast i8 %mask to <8 x i1>
    %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
    ret <8 x double> %3
@@ -1465,7 +1465,7 @@ define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8
  ; X86-NEXT:    kmovw %eax, %k1
  ; X86-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 2)
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 10)
    %2 = bitcast i8 %mask to <8 x i1>
    %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
    ret <8 x double> %3
@@ -1484,7 +1484,7 @@ define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8
  ; X86-NEXT:    kmovw %eax, %k1
  ; X86-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 3)
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 11)
    %2 = bitcast i8 %mask to <8 x i1>
    %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
    ret <8 x double> %3
@@ -1502,7 +1502,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1520,7 +1520,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1538,7 +1538,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1556,7 +1556,7 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1594,7 +1594,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1614,7 +1614,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1634,7 +1634,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1654,7 +1654,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1685,7 +1685,7 @@ define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    ret <16 x float> %1
  }
  
@@ -1694,7 +1694,7 @@ define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    ret <16 x float> %1
  }
  
@@ -1703,7 +1703,7 @@ define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    ret <16 x float> %1
  }
  
@@ -1712,7 +1712,7 @@ define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    ret <16 x float> %1
  }
  
@@ -1740,7 +1740,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1760,7 +1760,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1780,7 +1780,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1800,7 +1800,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1831,7 +1831,7 @@ define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    ret <16 x float> %1
  }
  
@@ -1840,7 +1840,7 @@ define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    ret <16 x float> %1
  }
  
@@ -1849,7 +1849,7 @@ define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    ret <16 x float> %1
  }
  
@@ -1858,7 +1858,7 @@ define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    ret <16 x float> %1
  }
  
@@ -1883,7 +1883,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1901,7 +1901,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1919,7 +1919,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1937,7 +1937,7 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16
  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
  ; X86-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
    ret <16 x float> %3
@@ -1975,7 +1975,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -1995,7 +1995,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -2015,7 +2015,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -2035,7 +2035,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x
  ; X86-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
  ; X86-NEXT:    vmovaps %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    %2 = bitcast i16 %mask to <16 x i1>
    %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
    ret <16 x float> %3
@@ -2066,7 +2066,7 @@ define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
    ret <16 x float> %1
  }
  
@@ -2075,7 +2075,7 @@ define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
    ret <16 x float> %1
  }
  
@@ -2084,7 +2084,7 @@ define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
    ret <16 x float> %1
  }
  
@@ -2093,7 +2093,7 @@ define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x floa
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
    ret <16 x float> %1
  }
  
@@ -2314,7 +2314,7 @@ define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x f
  ; X86-NEXT:    vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovaps %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
+  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
    ret <4 x float> %res
  }
  
@@ -2333,7 +2333,7 @@ define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x f
  ; X86-NEXT:    vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovaps %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
+  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
    ret <4 x float> %res
  }
  
@@ -2352,7 +2352,7 @@ define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x f
  ; X86-NEXT:    vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovaps %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
+  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 10)
    ret <4 x float> %res
  }
  
@@ -2371,7 +2371,7 @@ define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x f
  ; X86-NEXT:    vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovaps %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
+  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 11)
    ret <4 x float> %res
  }
  
@@ -2407,7 +2407,7 @@ define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %m
  ; X86-NEXT:    kmovw %eax, %k1
  ; X86-NEXT:    vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
+  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
    ret <4 x float> %res
  }
  
@@ -2416,7 +2416,7 @@ define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
+  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
    ret <4 x float> %res
  }
  
@@ -2485,7 +2485,7 @@ define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2
  ; X86-NEXT:    vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovapd %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
+  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
    ret <2 x double> %res
  }
  
@@ -2504,7 +2504,7 @@ define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2
  ; X86-NEXT:    vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovapd %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
+  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
    ret <2 x double> %res
  }
  
@@ -2523,7 +2523,7 @@ define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2
  ; X86-NEXT:    vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovapd %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
+  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 10)
    ret <2 x double> %res
  }
  
@@ -2542,7 +2542,7 @@ define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2
  ; X86-NEXT:    vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
  ; X86-NEXT:    vmovapd %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
+  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 11)
    ret <2 x double> %res
  }
  
@@ -2578,7 +2578,7 @@ define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8
  ; X86-NEXT:    kmovw %eax, %k1
  ; X86-NEXT:    vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
  ; X86-NEXT:    retl
-  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
+  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
    ret <2 x double> %res
  }
  
@@ -2587,7 +2587,7 @@ define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
  ; CHECK-NEXT:    ret{{[l|q]}}
-  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
+  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
    ret <2 x double> %res
  }
  
@@ -2921,7 +2921,7 @@ define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    vcvtsi2ssl %eax, {rz-sae}, %xmm0, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 11) ; <<<4 x float>> [#uses=1]
    ret <4 x float> %res
  }
  declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
@@ -2937,7 +2937,7 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
    ret <4 x float> %res
  }
  
@@ -2955,7 +2955,7 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32*
  ; X86-NEXT:    vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
  ; X86-NEXT:    retl
    %b = load i32, i32* %ptr
-  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
    ret <4 x float> %res
  }
  
@@ -3266,8 +3266,8 @@ define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8
  ; X86-NEXT:    vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
  ; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
-  %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 11)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 8)
    %res2 = fadd <8 x double> %res, %res1
    ret <8 x double> %res2
  }
@@ -3289,8 +3289,8 @@ define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16
  ; X86-NEXT:    vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
  ; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0
  ; X86-NEXT:    retl
-  %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
-  %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 10)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 8)
    %res2 = fadd <16 x float> %res, %res1
    ret <16 x float> %res2
  }
@@ -4164,7 +4164,7 @@ define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x
    %cvt = sitofp <16 x i32> %x0 to <16 x float>
    %1 = bitcast i16 %x2 to <16 x i1>
    %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
-  %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0)
+  %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
    %res2 = fadd <16 x float> %2, %3
    ret <16 x float> %res2
  }
@@ -4189,7 +4189,7 @@ define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i
  ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
  ; X86-NEXT:    retl
    %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
-  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
    %res2 = add <8 x i32> %res, %res1
    ret <8 x i32> %res2
  }
@@ -4214,7 +4214,7 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x
  ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0
  ; X86-NEXT:    retl
    %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
-  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 10)
    %res2 = fadd <8 x float> %res, %res1
    ret <8 x float> %res2
  }
@@ -4238,8 +4238,8 @@ define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x
  ; X86-NEXT:    vcvtpd2udq {rn-sae}, %zmm0, %ymm0
  ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
  ; X86-NEXT:    retl
-  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
-  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
    %res2 = add <8 x i32> %res, %res1
    ret <8 x i32> %res2
  }
@@ -4262,8 +4262,8 @@ define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x
  ; X86-NEXT:    vcvtps2dq {rn-sae}, %zmm0, %zmm0
  ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
  ; X86-NEXT:    retl
-  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
-  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
    %res2 = add <16 x i32> %res, %res1
    ret <16 x i32> %res2
  }
@@ -4311,8 +4311,8 @@ define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16
  ; X86-NEXT:    vcvtps2udq {rn-sae}, %zmm0, %zmm0
  ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
  ; X86-NEXT:    retl
-  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
-  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
    %res2 = add <16 x i32> %res, %res1
    ret <16 x i32> %res2
  }
@@ -4363,7 +4363,7 @@ define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16
    %cvt = uitofp <16 x i32> %x0 to <16 x float>
    %1 = bitcast i16 %x2 to <16 x i1>
    %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
-  %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0)
+  %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
    %res2 = fadd <16 x float> %2, %3
    ret <16 x float> %res2
  }
@@ -4990,7 +4990,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x
  ; X86-NEXT:    vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
  ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0
  ; X86-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 11)
    %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
    %res2 = fadd <4 x float> %res, %res1
    ret <4 x float> %res2
@@ -5712,12 +5712,12 @@ define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x do
    %14 = extractelement <2 x double> %x0, i64 0
    %15 = extractelement <2 x double> %x1, i64 0
    %16 = extractelement <2 x double> %x2, i64 0
-  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 3)
+  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
    %18 = insertelement <2 x double> %x0, double %17, i64 0
    %19 = extractelement <2 x double> %x0, i64 0
    %20 = extractelement <2 x double> %x1, i64 0
    %21 = extractelement <2 x double> %x2, i64 0
-  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3)
+  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 11)
    %23 = bitcast i8 %x3 to <8 x i1>
    %24 = extractelement <8 x i1> %23, i64 0
    %25 = select i1 %24, double %22, double %19
@@ -5775,12 +5775,12 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x floa
    %14 = extractelement <4 x float> %x0, i64 0
    %15 = extractelement <4 x float> %x1, i64 0
    %16 = extractelement <4 x float> %x2, i64 0
-  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 3)
+  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
    %18 = insertelement <4 x float> %x0, float %17, i64 0
    %19 = extractelement <4 x float> %x0, i64 0
    %20 = extractelement <4 x float> %x1, i64 0
    %21 = extractelement <4 x float> %x2, i64 0
-  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3)
+  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 11)
    %23 = bitcast i8 %x3 to <8 x i1>
    %24 = extractelement <8 x i1> %23, i64 0
    %25 = select i1 %24, float %22, float %19
@@ -5821,7 +5821,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x d
    %9 = extractelement <2 x double> %x0, i64 0
    %10 = extractelement <2 x double> %x1, i64 0
    %11 = extractelement <2 x double> %x2, i64 0
-  %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 3)
+  %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
    %13 = bitcast i8 %x3 to <8 x i1>
    %14 = extractelement <8 x i1> %13, i64 0
    %15 = select i1 %14, double %12, double 0.000000e+00
@@ -5857,7 +5857,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x flo
    %9 = extractelement <4 x float> %x0, i64 0
    %10 = extractelement <4 x float> %x1, i64 0
    %11 = extractelement <4 x float> %x2, i64 0
-  %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 3)
+  %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
    %13 = bitcast i8 %x3 to <8 x i1>
    %14 = extractelement <8 x i1> %13, i64 0
    %15 = select i1 %14, float %12, float 0.000000e+00
@@ -5913,12 +5913,12 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x d
    %14 = extractelement <2 x double> %x0, i64 0
    %15 = extractelement <2 x double> %x1, i64 0
    %16 = extractelement <2 x double> %x2, i64 0
-  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 3)
+  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
    %18 = insertelement <2 x double> %x2, double %17, i64 0
    %19 = extractelement <2 x double> %x0, i64 0
    %20 = extractelement <2 x double> %x1, i64 0
    %21 = extractelement <2 x double> %x2, i64 0
-  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3)
+  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 11)
    %23 = bitcast i8 %x3 to <8 x i1>
    %24 = extractelement <8 x i1> %23, i64 0
    %25 = select i1 %24, double %22, double %21
@@ -5976,12 +5976,12 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x flo
    %14 = extractelement <4 x float> %x0, i64 0
    %15 = extractelement <4 x float> %x1, i64 0
    %16 = extractelement <4 x float> %x2, i64 0
-  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 3)
+  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
    %18 = insertelement <4 x float> %x2, float %17, i64 0
    %19 = extractelement <4 x float> %x0, i64 0
    %20 = extractelement <4 x float> %x1, i64 0
    %21 = extractelement <4 x float> %x2, i64 0
-  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3)
+  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 11)
    %23 = bitcast i8 %x3 to <8 x i1>
    %24 = extractelement <8 x i1> %23, i64 0
    %25 = select i1 %24, float %22, float %21
@@ -6220,14 +6220,14 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x d
    %19 = extractelement <2 x double> %x0, i64 0
    %20 = extractelement <2 x double> %x1, i64 0
    %21 = extractelement <2 x double> %18, i64 0
-  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3)
+  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 11)
    %23 = extractelement <2 x double> %x2, i64 0
    %24 = insertelement <2 x double> %x2, double %22, i64 0
    %25 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
    %26 = extractelement <2 x double> %x0, i64 0
    %27 = extractelement <2 x double> %x1, i64 0
    %28 = extractelement <2 x double> %25, i64 0
-  %29 = call double @llvm.x86.avx512.vfmadd.f64(double %26, double %27, double %28, i32 3)
+  %29 = call double @llvm.x86.avx512.vfmadd.f64(double %26, double %27, double %28, i32 11)
    %30 = extractelement <2 x double> %x2, i64 0
    %31 = bitcast i8 %x3 to <8 x i1>
    %32 = extractelement <8 x i1> %31, i64 0
@@ -6291,14 +6291,14 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x flo
    %19 = extractelement <4 x float> %x0, i64 0
    %20 = extractelement <4 x float> %x1, i64 0
    %21 = extractelement <4 x float> %18, i64 0
-  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3)
+  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 11)
    %23 = extractelement <4 x float> %x2, i64 0
    %24 = insertelement <4 x float> %x2, float %22, i64 0
    %25 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
    %26 = extractelement <4 x float> %x0, i64 0
    %27 = extractelement <4 x float> %x1, i64 0
    %28 = extractelement <4 x float> %25, i64 0
-  %29 = call float @llvm.x86.avx512.vfmadd.f32(float %26, float %27, float %28, i32 3)
+  %29 = call float @llvm.x86.avx512.vfmadd.f32(float %26, float %27, float %28, i32 11)
    %30 = extractelement <4 x float> %x2, i64 0
    %31 = bitcast i8 %x3 to <8 x i1>
    %32 = extractelement <8 x i1> %31, i64 0
@@ -6365,7 +6365,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x
    %22 = extractelement <2 x double> %20, i64 0
    %23 = extractelement <2 x double> %x1, i64 0
    %24 = extractelement <2 x double> %21, i64 0
-  %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 3)
+  %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 11)
    %26 = extractelement <2 x double> %x2, i64 0
    %27 = insertelement <2 x double> %x2, double %25, i64 0
    %28 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0
@@ -6373,7 +6373,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x
    %30 = extractelement <2 x double> %28, i64 0
    %31 = extractelement <2 x double> %x1, i64 0
    %32 = extractelement <2 x double> %29, i64 0
-  %33 = call double @llvm.x86.avx512.vfmadd.f64(double %30, double %31, double %32, i32 3)
+  %33 = call double @llvm.x86.avx512.vfmadd.f64(double %30, double %31, double %32, i32 11)
    %34 = extractelement <2 x double> %x2, i64 0
    %35 = bitcast i8 %x3 to <8 x i1>
    %36 = extractelement <8 x i1> %35, i64 0
@@ -6440,7 +6440,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x fl
    %22 = extractelement <4 x float> %20, i64 0
    %23 = extractelement <4 x float> %x1, i64 0
    %24 = extractelement <4 x float> %21, i64 0
-  %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 3)
+  %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 11)
    %26 = extractelement <4 x float> %x2, i64 0
    %27 = insertelement <4 x float> %x2, float %25, i64 0
    %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
@@ -6448,7 +6448,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x fl
    %30 = extractelement <4 x float> %28, i64 0
    %31 = extractelement <4 x float> %x1, i64 0
    %32 = extractelement <4 x float> %29, i64 0
-  %33 = call float @llvm.x86.avx512.vfmadd.f32(float %30, float %31, float %32, i32 3)
+  %33 = call float @llvm.x86.avx512.vfmadd.f32(float %30, float %31, float %32, i32 11)
    %34 = extractelement <4 x float> %x2, i64 0
    %35 = bitcast i8 %x3 to <8 x i1>
    %36 = extractelement <8 x i1> %35, i64 0
diff --git a/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll

index 4ddbfde7624dcd285d46be8ae5d2ea0635a70876..64659d4558604aa441ace5957129e29c1f6ad9f8 100644 (file)
--- a/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
@@ -587,7 +587,7 @@ define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x d
  ; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
    %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
-  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 8)
    %res2 = fadd <8 x double> %res, %res1
    ret <8 x double> %res2
  }
@@ -611,7 +611,7 @@ define <8 x float> @test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x f
  ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
    %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
-  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 8)
    %res2 = fadd <8 x float> %res, %res1
    ret <8 x float> %res2
  }
@@ -635,7 +635,7 @@ define <8 x double> @test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x
  ; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
    %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
-  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 8)
    %res2 = fadd <8 x double> %res, %res1
    ret <8 x double> %res2
  }
@@ -659,7 +659,7 @@ define <8 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x
  ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
    %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
-  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 8)
    %res2 = fadd <8 x float> %res, %res1
    ret <8 x float> %res2
  }
diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll

index 3d488795141ca77da84df35ee00135935cb333dc..506db572671ab7f4c932739132d6ba13aceef8a3 100644 (file)
--- a/test/CodeGen/X86/avx512dq-intrinsics.ll
+++ b/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -134,8 +134,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i
  ; X64-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7b,0xc0]
  ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
-  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
    %res2 = add <8 x i64> %res, %res1
    ret <8 x i64> %res2
  }
@@ -158,8 +158,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x
  ; X64-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x79,0xc0]
  ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
-  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
    %res2 = add <8 x i64> %res, %res1
    ret <8 x i64> %res2
  }
@@ -182,8 +182,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i6
  ; X64-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7b,0xc0]
  ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
-  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
    %res2 = add <8 x i64> %res, %res1
    ret <8 x i64> %res2
  }
@@ -206,8 +206,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i
  ; X64-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x79,0xc0]
  ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
  ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
-  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
    %res2 = add <8 x i64> %res, %res1
    ret <8 x i64> %res2
  }
@@ -233,7 +233,7 @@ define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x d
    %cvt = sitofp <8 x i64> %x0 to <8 x double>
    %1 = bitcast i8 %x2 to <8 x i1>
    %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
-  %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0)
+  %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8)
    %res2 = fadd <8 x double> %2, %3
    ret <8 x double> %res2
  }
@@ -275,7 +275,7 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x fl
    %cvt = sitofp <8 x i64> %x0 to <8 x float>
    %1 = bitcast i8 %x2 to <8 x i1>
    %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
-  %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0)
+  %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8)
    %res2 = fadd <8 x float> %2, %3
    ret <8 x float> %res2
  }
@@ -397,7 +397,7 @@ define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x
    %cvt = uitofp <8 x i64> %x0 to <8 x double>
    %1 = bitcast i8 %x2 to <8 x i1>
    %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
-  %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0)
+  %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8)
    %res2 = fadd <8 x double> %2, %3
    ret <8 x double> %res2
  }
@@ -439,7 +439,7 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x f
    %cvt = uitofp <8 x i64> %x0 to <8 x float>
    %1 = bitcast i8 %x2 to <8 x i1>
    %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
-  %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0)
+  %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8)
    %res2 = fadd <8 x float> %2, %3
    ret <8 x float> %res2
  }
diff --git a/test/CodeGen/X86/fma-fneg-combine.ll b/test/CodeGen/X86/fma-fneg-combine.ll

index 35965a8b66e9dee55c9a4ae7e3cb95bfe9d308a3..d583c54a086b525395b932da946f28b420b9e1c9 100644 (file)
--- a/test/CodeGen/X86/fma-fneg-combine.ll
+++ b/test/CodeGen/X86/fma-fneg-combine.ll
@@ -61,7 +61,7 @@ define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
  ; CHECK-NEXT:    retq
  entry:
    %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
-  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 2) #2
+  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 10) #2
    ret <16 x float> %0
  }
  
@@ -71,7 +71,7 @@ define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
  ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
  ; CHECK-NEXT:    retq
  entry:
-  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
+  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 10) #2
    %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
    ret <16 x float> %sub.i
  }
@@ -238,7 +238,7 @@ define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i
  ; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
  ; KNL-NEXT:    retq
  entry:
-  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2) #2
+  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 10) #2
    %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
    ret <16 x float> %sub.i
  }
@@ -266,9 +266,9 @@ define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i
  entry:
    %bc = bitcast i16 %mask to <16 x i1>
    %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
-  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 2)
+  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 10)
    %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i
-  %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 1)
+  %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 9)
    %sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel
    ret <16 x float> %sel2
  }
@@ -286,7 +286,7 @@ define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i
  ; KNL-NEXT:    vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
  ; KNL-NEXT:    retq
    %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
-  %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 1)
+  %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 9)
    %bc = bitcast i16 %mask to <16 x i1>
    %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a
    ret <16 x float> %sel
author	Craig Topper <craig.topper@intel.com>
	Sun, 10 Mar 2019 17:20:45 +0000 (17:20 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 10 Mar 2019 17:20:45 +0000 (17:20 +0000)
lib/Target/X86/MCTargetDesc/X86BaseInfo.h		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll		patch \| blob \| history
test/CodeGen/X86/avx512-fma-intrinsics.ll		patch \| blob \| history
test/CodeGen/X86/avx512-intrinsics-upgrade.ll		patch \| blob \| history
test/CodeGen/X86/avx512-intrinsics-x86_64.ll		patch \| blob \| history
test/CodeGen/X86/avx512-intrinsics.ll		patch \| blob \| history
test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll		patch \| blob \| history
test/CodeGen/X86/avx512dq-intrinsics.ll		patch \| blob \| history
test/CodeGen/X86/fma-fneg-combine.ll		patch \| blob \| history