TO_NEG_INF = 1,
TO_POS_INF = 2,
TO_ZERO = 3,
- CUR_DIRECTION = 4
+ CUR_DIRECTION = 4,
+ NO_EXC = 8
};
/// The constants to describe instr prefixes if there are
SelectionDAG &DAG) const {
// Helper to detect if the operand is CUR_DIRECTION rounding mode.
auto isRoundModeCurDirection = [](SDValue Rnd) {
- if (!isa<ConstantSDNode>(Rnd))
- return false;
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+ return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;
- unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
- return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
+ return false;
+ };
+ auto isRoundModeSAE = [](SDValue Rnd) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+ return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC;
+
+ return false;
+ };
+ auto isRoundModeSAEToX = [](SDValue Rnd) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
+ unsigned Round = C->getZExtValue();
+ if (Round & X86::STATIC_ROUNDING::NO_EXC) {
+ // Clear the NO_EXC bit and check remaining bits.
+ Round ^= X86::STATIC_ROUNDING::NO_EXC;
+ return Round == X86::STATIC_ROUNDING::TO_NEAREST_INT ||
+ Round == X86::STATIC_ROUNDING::TO_NEG_INF ||
+ Round == X86::STATIC_ROUNDING::TO_POS_INF ||
+ Round == X86::STATIC_ROUNDING::TO_ZERO;
+ }
+ }
+
+ return false;
};
SDLoc dl(Op);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(2);
- if (!isRoundModeCurDirection(Rnd)) {
+ if (isRoundModeSAEToX(Rnd))
return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
Op.getOperand(1), Rnd);
- }
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
}
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(3);
- if (!isRoundModeCurDirection(Rnd)) {
+ if (isRoundModeSAEToX(Rnd))
return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
Op.getOperand(1), Src2, Rnd);
- }
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd)) {
+ if (isRoundModeSAEToX(Rnd))
return DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd);
- }
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd)) {
+ if (isRoundModeSAEToX(Rnd))
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src, Rnd),
Mask, PassThru, Subtarget, DAG);
- }
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
Mask, PassThru, Subtarget, DAG);
if (Op.getNumOperands() == (5U + HasRounding)) {
if (HasRounding) {
SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd))
+ if (isRoundModeSAEToX(Rnd))
return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, VT, Src1, Src2, Rnd),
Mask, passThru, Subtarget, DAG);
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
Src2),
SDValue RoundingMode = Op.getOperand(5);
if (HasRounding) {
SDValue Sae = Op.getOperand(6);
- if (!isRoundModeCurDirection(Sae))
+ if (isRoundModeSAE(Sae))
return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, VT, Src1, Src2,
RoundingMode, Sae),
Mask, passThru, Subtarget, DAG);
+ if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
Src2, RoundingMode),
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd)) {
+ if (isRoundModeSAEToX(Rnd))
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
- }
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
// TODO: Intrinsics should have fast-math-flags to propagate.
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(6);
- if (!isRoundModeCurDirection(Rnd))
+ if (isRoundModeSAEToX(Rnd))
return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, VT, Src1, Src2, Src3, Rnd),
Mask, PassThru, Subtarget, DAG);
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
Src2, Src3),
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(6);
- if (!isRoundModeCurDirection(Rnd)) {
+ if (isRoundModeSAEToX(Rnd))
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
Mask, PassThru, Subtarget, DAG);
- }
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3),
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
if (IntrData->Opc1 != 0) {
- SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd))
- Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2), CC, Rnd);
+ SDValue Sae = Op.getOperand(4);
+ if (isRoundModeSAE(Sae))
+ return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), CC, Sae);
+ if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
//default rounding mode
- if (!Cmp.getNode())
- Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), CC);
-
- return Cmp;
}
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
SDValue Cmp;
if (IntrData->Opc1 != 0) {
- SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd))
- Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
+ SDValue Sae = Op.getOperand(5);
+ if (isRoundModeSAE(Sae))
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae);
+ else if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
//default rounding mode
if (!Cmp.getNode())
if (isRoundModeCurDirection(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8));
- else
+ else if (isRoundModeSAE(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+ else
+ return SDValue();
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 8) nounwind
ret <16 x float> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 9) nounwind
ret <16 x float> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 10) nounwind
ret <16 x float> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 11) nounwind
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 8) nounwind
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 9) nounwind
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 10) nounwind
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 11) nounwind
ret <16 x float> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind
ret <8 x double> %res
}
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind
ret <8 x double> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind
ret <8 x double> %res
}
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
%bc = bitcast i16 %mask to <16 x i1>
%sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
ret <16 x float> %sel
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 1) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
%bc = bitcast i16 %mask to <16 x i1>
%sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
ret <16 x float> %sel
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 2) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
%bc = bitcast i16 %mask to <16 x i1>
%sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
ret <16 x float> %sel
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 3) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
%bc = bitcast i16 %mask to <16 x i1>
%sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
ret <16 x float> %sel
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 0) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
ret <16 x float> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 1) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
ret <16 x float> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 2) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
ret <16 x float> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 3) nounwind
+ %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
ret <16 x float> %res
}
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
%bc = bitcast i8 %mask to <8 x i1>
%sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
ret <8 x double> %sel
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
%bc = bitcast i8 %mask to <8 x i1>
%sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
ret <8 x double> %sel
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
%bc = bitcast i8 %mask to <8 x i1>
%sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
ret <8 x double> %sel
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
%bc = bitcast i8 %mask to <8 x i1>
%sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
ret <8 x double> %sel
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 0) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
ret <8 x double> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 1) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
ret <8 x double> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 2) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
ret <8 x double> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 3) nounwind
+ %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
ret <8 x double> %res
}
; X64-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 0)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
ret <8 x double> %5
; X64-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 1)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
ret <8 x double> %5
; X64-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 2)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
ret <8 x double> %5
; X64-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 3)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
ret <8 x double> %5
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 0)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
ret <8 x double> %3
}
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 1)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
ret <8 x double> %3
}
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 2)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
ret <8 x double> %3
}
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
- %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 3)
+ %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
ret <8 x double> %3
}
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 0)
+ <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 1)
+ <16 x float> zeroinitializer, i16 -1, i32 9)
ret <16 x float> %res
}
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 2)
+ <16 x float> zeroinitializer, i16 -1, i32 10)
ret <16 x float> %res
}
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 3)
+ <16 x float> zeroinitializer, i16 -1, i32 11)
ret <16 x float> %res
}
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 0)
+ <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 1)
+ <16 x float> zeroinitializer, i16 -1, i32 9)
ret <16 x float> %res
}
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 2)
+ <16 x float> zeroinitializer, i16 -1, i32 10)
ret <16 x float> %res
}
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 -1, i32 3)
+ <16 x float> zeroinitializer, i16 -1, i32 11)
ret <16 x float> %res
}
; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 %mask, i32 0)
+ <16 x float> zeroinitializer, i16 %mask, i32 8)
ret <16 x float> %res
}
; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 %mask, i32 1)
+ <16 x float> zeroinitializer, i16 %mask, i32 9)
ret <16 x float> %res
}
; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 %mask, i32 2)
+ <16 x float> zeroinitializer, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> zeroinitializer, i16 %mask, i32 3)
+ <16 x float> zeroinitializer, i16 %mask, i32 11)
ret <16 x float> %res
}
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> %passthru, i16 %mask, i32 0)
+ <16 x float> %passthru, i16 %mask, i32 8)
ret <16 x float> %res
}
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> %passthru, i16 %mask, i32 1)
+ <16 x float> %passthru, i16 %mask, i32 9)
ret <16 x float> %res
}
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> %passthru, i16 %mask, i32 2)
+ <16 x float> %passthru, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float> %passthru, i16 %mask, i32 3)
+ <16 x float> %passthru, i16 %mask, i32 11)
ret <16 x float> %res
}
; X64-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
- <8 x double> zeroinitializer, i8 %mask, i32 0)
+ <8 x double> zeroinitializer, i8 %mask, i32 8)
ret <8 x double> %res
}
; X64-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
- <8 x double> zeroinitializer, i8 %mask, i32 1)
+ <8 x double> zeroinitializer, i8 %mask, i32 9)
ret <8 x double> %res
}
; X64-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
- <8 x double> zeroinitializer, i8 %mask, i32 2)
+ <8 x double> zeroinitializer, i8 %mask, i32 10)
ret <8 x double> %res
}
; X64-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
- <8 x double> zeroinitializer, i8 %mask, i32 3)
+ <8 x double> zeroinitializer, i8 %mask, i32 11)
ret <8 x double> %res
}
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
ret <16 x float> %res
}
; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
ret <16 x float> %res
}
; X64-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; X64-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; X64-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
ret <16 x float> %res
}
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
ret <16 x float> %res
}
; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
ret <16 x float> %res
}
; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
ret <16 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
ret <16 x float> %res
}
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
- %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
- %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
%res4 = fadd <2 x double> %res, %res1
%res5 = fadd <2 x double> %res2, %res3
%res6 = fadd <2 x double> %res4, %res5
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
- %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
- %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
%res4 = fadd <4 x float> %res, %res1
%res5 = fadd <4 x float> %res2, %res3
%res6 = fadd <4 x float> %res4, %res5
; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res
}
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
- %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
- %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
%res4 = fadd <2 x double> %res, %res1
%res5 = fadd <2 x double> %res2, %res3
%res6 = fadd <2 x double> %res4, %res5
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
- %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
- %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
%res4 = fadd <4 x float> %res, %res1
%res5 = fadd <4 x float> %res2, %res3
%res6 = fadd <4 x float> %res4, %res5
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
- %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
- %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
%res4 = fadd <2 x double> %res, %res1
%res5 = fadd <2 x double> %res2, %res3
%res6 = fadd <2 x double> %res4, %res5
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
- %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
- %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
%res4 = fadd <4 x float> %res, %res1
%res5 = fadd <4 x float> %res2, %res3
%res6 = fadd <4 x float> %res4, %res5
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
- %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
- %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
%res4 = fadd <2 x double> %res, %res1
%res5 = fadd <2 x double> %res2, %res3
%res6 = fadd <2 x double> %res4, %res5
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
- %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
- %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
%res4 = fadd <4 x float> %res, %res1
%res5 = fadd <4 x float> %res2, %res3
%res6 = fadd <4 x float> %res4, %res5
; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
- %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3)
- %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1)
+ %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 11)
+ %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 9)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
- %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3)
- %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1)
+ %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 11)
+ %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 9)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
- %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3)
- %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1)
+ %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 11)
+ %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 9)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
- %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3)
- %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1)
+ %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 11)
+ %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 9)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
; CHECK-NEXT: retq
- %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 11) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
; CHECK-NEXT: retq
- %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 11) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT: retq
- %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 9) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT: retq
- %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 9) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
}
; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0
; X86-NEXT: retl
%res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
- %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
- %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 10)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 11)
%res.1 = fadd <4 x float> %res0, %res1
%res.2 = fadd <4 x float> %res2, %res3
; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; X86-NEXT: retl
%res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
- %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
- %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 10)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 11)
%res.1 = fadd <2 x double> %res0, %res1
%res.2 = fadd <2 x double> %res2, %res3
; CHECK-NEXT: ret{{[l|q]}}
%res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3)
- %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1)
+ %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 9)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
; CHECK-NEXT: ret{{[l|q]}}
%res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3)
- %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1)
+ %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 9)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
; CHECK-NEXT: ret{{[l|q]}}
%res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3)
- %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1)
+ %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 9)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
; CHECK-NEXT: ret{{[l|q]}}
%res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3)
- %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1)
+ %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 9)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
ret <16 x float> %1
}
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0)
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 8)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 1)
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 9)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 2)
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 10)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 3)
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 11)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; CHECK: # %bb.0:
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
ret <16 x float> %1
}
; X86-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
ret <16 x float> %1
}
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
; CHECK: # %bb.0:
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
ret <16 x float> %1
}
; CHECK: # %bb.0:
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
ret <16 x float> %1
}
; X86-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
ret <4 x float> %res
}
; X86-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
ret <4 x float> %res
}
; X86-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 10)
ret <4 x float> %res
}
; X86-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 11)
ret <4 x float> %res
}
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
ret <4 x float> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
ret <4 x float> %res
}
; X86-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
ret <2 x double> %res
}
; X86-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
ret <2 x double> %res
}
; X86-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 10)
ret <2 x double> %res
}
; X86-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 11)
ret <2 x double> %res
}
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
ret <2 x double> %res
}
; CHECK: # %bb.0:
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
ret <2 x double> %res
}
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2ssl %eax, {rz-sae}, %xmm0, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 11) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
; X86-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
; X86-NEXT: retl
%b = load i32, i32* %ptr
- %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
; X86-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0
; X86-NEXT: retl
- %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+ %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 11)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 8)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}
; X86-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0
; X86-NEXT: retl
- %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+ %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 10)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 8)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
%cvt = sitofp <16 x i32> %x0 to <16 x float>
%1 = bitcast i16 %x2 to <16 x i1>
%2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
- %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0)
+ %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
%res2 = fadd <16 x float> %2, %3
ret <16 x float> %res2
}
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0
; X86-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0
; X86-NEXT: retl
%res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 10)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
; X86-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0
; X86-NEXT: retl
- %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
; X86-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; X86-NEXT: retl
- %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
; X86-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; X86-NEXT: retl
- %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
%cvt = uitofp <16 x i32> %x0 to <16 x float>
%1 = bitcast i16 %x2 to <16 x i1>
%2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
- %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0)
+ %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
%res2 = fadd <16 x float> %2, %3
ret <16 x float> %res2
}
; X86-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0
; X86-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 11)
%res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
%14 = extractelement <2 x double> %x0, i64 0
%15 = extractelement <2 x double> %x1, i64 0
%16 = extractelement <2 x double> %x2, i64 0
- %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 3)
+ %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
%18 = insertelement <2 x double> %x0, double %17, i64 0
%19 = extractelement <2 x double> %x0, i64 0
%20 = extractelement <2 x double> %x1, i64 0
%21 = extractelement <2 x double> %x2, i64 0
- %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3)
+ %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 11)
%23 = bitcast i8 %x3 to <8 x i1>
%24 = extractelement <8 x i1> %23, i64 0
%25 = select i1 %24, double %22, double %19
%14 = extractelement <4 x float> %x0, i64 0
%15 = extractelement <4 x float> %x1, i64 0
%16 = extractelement <4 x float> %x2, i64 0
- %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 3)
+ %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
%18 = insertelement <4 x float> %x0, float %17, i64 0
%19 = extractelement <4 x float> %x0, i64 0
%20 = extractelement <4 x float> %x1, i64 0
%21 = extractelement <4 x float> %x2, i64 0
- %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3)
+ %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 11)
%23 = bitcast i8 %x3 to <8 x i1>
%24 = extractelement <8 x i1> %23, i64 0
%25 = select i1 %24, float %22, float %19
%9 = extractelement <2 x double> %x0, i64 0
%10 = extractelement <2 x double> %x1, i64 0
%11 = extractelement <2 x double> %x2, i64 0
- %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 3)
+ %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
%13 = bitcast i8 %x3 to <8 x i1>
%14 = extractelement <8 x i1> %13, i64 0
%15 = select i1 %14, double %12, double 0.000000e+00
%9 = extractelement <4 x float> %x0, i64 0
%10 = extractelement <4 x float> %x1, i64 0
%11 = extractelement <4 x float> %x2, i64 0
- %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 3)
+ %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
%13 = bitcast i8 %x3 to <8 x i1>
%14 = extractelement <8 x i1> %13, i64 0
%15 = select i1 %14, float %12, float 0.000000e+00
%14 = extractelement <2 x double> %x0, i64 0
%15 = extractelement <2 x double> %x1, i64 0
%16 = extractelement <2 x double> %x2, i64 0
- %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 3)
+ %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
%18 = insertelement <2 x double> %x2, double %17, i64 0
%19 = extractelement <2 x double> %x0, i64 0
%20 = extractelement <2 x double> %x1, i64 0
%21 = extractelement <2 x double> %x2, i64 0
- %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3)
+ %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 11)
%23 = bitcast i8 %x3 to <8 x i1>
%24 = extractelement <8 x i1> %23, i64 0
%25 = select i1 %24, double %22, double %21
%14 = extractelement <4 x float> %x0, i64 0
%15 = extractelement <4 x float> %x1, i64 0
%16 = extractelement <4 x float> %x2, i64 0
- %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 3)
+ %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
%18 = insertelement <4 x float> %x2, float %17, i64 0
%19 = extractelement <4 x float> %x0, i64 0
%20 = extractelement <4 x float> %x1, i64 0
%21 = extractelement <4 x float> %x2, i64 0
- %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3)
+ %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 11)
%23 = bitcast i8 %x3 to <8 x i1>
%24 = extractelement <8 x i1> %23, i64 0
%25 = select i1 %24, float %22, float %21
%19 = extractelement <2 x double> %x0, i64 0
%20 = extractelement <2 x double> %x1, i64 0
%21 = extractelement <2 x double> %18, i64 0
- %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3)
+ %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 11)
%23 = extractelement <2 x double> %x2, i64 0
%24 = insertelement <2 x double> %x2, double %22, i64 0
%25 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
%26 = extractelement <2 x double> %x0, i64 0
%27 = extractelement <2 x double> %x1, i64 0
%28 = extractelement <2 x double> %25, i64 0
- %29 = call double @llvm.x86.avx512.vfmadd.f64(double %26, double %27, double %28, i32 3)
+ %29 = call double @llvm.x86.avx512.vfmadd.f64(double %26, double %27, double %28, i32 11)
%30 = extractelement <2 x double> %x2, i64 0
%31 = bitcast i8 %x3 to <8 x i1>
%32 = extractelement <8 x i1> %31, i64 0
%19 = extractelement <4 x float> %x0, i64 0
%20 = extractelement <4 x float> %x1, i64 0
%21 = extractelement <4 x float> %18, i64 0
- %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3)
+ %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 11)
%23 = extractelement <4 x float> %x2, i64 0
%24 = insertelement <4 x float> %x2, float %22, i64 0
%25 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
%26 = extractelement <4 x float> %x0, i64 0
%27 = extractelement <4 x float> %x1, i64 0
%28 = extractelement <4 x float> %25, i64 0
- %29 = call float @llvm.x86.avx512.vfmadd.f32(float %26, float %27, float %28, i32 3)
+ %29 = call float @llvm.x86.avx512.vfmadd.f32(float %26, float %27, float %28, i32 11)
%30 = extractelement <4 x float> %x2, i64 0
%31 = bitcast i8 %x3 to <8 x i1>
%32 = extractelement <8 x i1> %31, i64 0
%22 = extractelement <2 x double> %20, i64 0
%23 = extractelement <2 x double> %x1, i64 0
%24 = extractelement <2 x double> %21, i64 0
- %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 3)
+ %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 11)
%26 = extractelement <2 x double> %x2, i64 0
%27 = insertelement <2 x double> %x2, double %25, i64 0
%28 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0
%30 = extractelement <2 x double> %28, i64 0
%31 = extractelement <2 x double> %x1, i64 0
%32 = extractelement <2 x double> %29, i64 0
- %33 = call double @llvm.x86.avx512.vfmadd.f64(double %30, double %31, double %32, i32 3)
+ %33 = call double @llvm.x86.avx512.vfmadd.f64(double %30, double %31, double %32, i32 11)
%34 = extractelement <2 x double> %x2, i64 0
%35 = bitcast i8 %x3 to <8 x i1>
%36 = extractelement <8 x i1> %35, i64 0
%22 = extractelement <4 x float> %20, i64 0
%23 = extractelement <4 x float> %x1, i64 0
%24 = extractelement <4 x float> %21, i64 0
- %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 3)
+ %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 11)
%26 = extractelement <4 x float> %x2, i64 0
%27 = insertelement <4 x float> %x2, float %25, i64 0
%28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
%30 = extractelement <4 x float> %28, i64 0
%31 = extractelement <4 x float> %x1, i64 0
%32 = extractelement <4 x float> %29, i64 0
- %33 = call float @llvm.x86.avx512.vfmadd.f32(float %30, float %31, float %32, i32 3)
+ %33 = call float @llvm.x86.avx512.vfmadd.f32(float %30, float %31, float %32, i32 11)
%34 = extractelement <4 x float> %x2, i64 0
%35 = bitcast i8 %x3 to <8 x i1>
%36 = extractelement <8 x i1> %35, i64 0
; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 8)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}
; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 8)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 8)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}
; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 8)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
; X64-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7b,0xc0]
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
; X64-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x79,0xc0]
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
; X64-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7b,0xc0]
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
; X64-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x79,0xc0]
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
%cvt = sitofp <8 x i64> %x0 to <8 x double>
%1 = bitcast i8 %x2 to <8 x i1>
%2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
- %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0)
+ %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8)
%res2 = fadd <8 x double> %2, %3
ret <8 x double> %res2
}
%cvt = sitofp <8 x i64> %x0 to <8 x float>
%1 = bitcast i8 %x2 to <8 x i1>
%2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
- %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0)
+ %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8)
%res2 = fadd <8 x float> %2, %3
ret <8 x float> %res2
}
%cvt = uitofp <8 x i64> %x0 to <8 x double>
%1 = bitcast i8 %x2 to <8 x i1>
%2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
- %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0)
+ %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 8)
%res2 = fadd <8 x double> %2, %3
ret <8 x double> %res2
}
%cvt = uitofp <8 x i64> %x0 to <8 x float>
%1 = bitcast i8 %x2 to <8 x i1>
%2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
- %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0)
+ %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 8)
%res2 = fadd <8 x float> %2, %3
ret <8 x float> %res2
}
; CHECK-NEXT: retq
entry:
%sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
- %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 2) #2
+ %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 10) #2
ret <16 x float> %0
}
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
- %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
+ %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 10) #2
%sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
ret <16 x float> %sub.i
}
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
entry:
- %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2) #2
+ %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 10) #2
%sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
ret <16 x float> %sub.i
}
entry:
%bc = bitcast i16 %mask to <16 x i1>
%sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
- %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 2)
+ %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 10)
%sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i
- %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 1)
+ %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 9)
%sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel
ret <16 x float> %sel2
}
; KNL-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
%sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
- %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 1)
+ %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 9)
%bc = bitcast i16 %mask to <16 x i1>
%sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a
ret <16 x float> %sel