else
PassThru = Src1;
- SDValue Rnd = Op.getOperand(5);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (!isRoundModeCurDirection(Rnd))
+ return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl,
+ Op.getValueType(), Src1, Src2,
+ Src3, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl,
Op.getValueType(), Src1, Src2,
- Src3, Rnd),
+ Src3),
Mask, PassThru, Subtarget, DAG);
}
case IFMA_OP_MASKZ:
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
+ case X86ISD::FMADDS1: return "X86ISD::FMADDS1";
+ case X86ISD::FNMADDS1: return "X86ISD::FNMADDS1";
+ case X86ISD::FMSUBS1: return "X86ISD::FMSUBS1";
+ case X86ISD::FNMSUBS1: return "X86ISD::FNMSUBS1";
case X86ISD::FMADDS1_RND: return "X86ISD::FMADDS1_RND";
case X86ISD::FNMADDS1_RND: return "X86ISD::FNMADDS1_RND";
case X86ISD::FMSUBS1_RND: return "X86ISD::FMSUBS1_RND";
case X86ISD::FNMSUBS1_RND: return "X86ISD::FNMSUBS1_RND";
+ case X86ISD::FMADDS3: return "X86ISD::FMADDS3";
+ case X86ISD::FNMADDS3: return "X86ISD::FNMADDS3";
+ case X86ISD::FMSUBS3: return "X86ISD::FMSUBS3";
+ case X86ISD::FNMSUBS3: return "X86ISD::FNMSUBS3";
case X86ISD::FMADDS3_RND: return "X86ISD::FMADDS3_RND";
case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND";
case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND";
// Do not convert the passthru input of scalar intrinsics.
// FIXME: We could allow negations of the lower element only.
- bool NegA = N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
+ bool NegA = N->getOpcode() != X86ISD::FMADDS1 &&
+ N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
bool NegB = invertIfNegative(B);
- bool NegC = N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
+ bool NegC = N->getOpcode() != X86ISD::FMADDS3 &&
+ N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
// Negative multiplication when NegA xor NegB
bool NegMul = (NegA != NegB);
case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break;
case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break;
}
+ } else if (N->getOpcode() == X86ISD::FMADDS1) {
+ switch (NewOpcode) {
+ case ISD::FMA: NewOpcode = X86ISD::FMADDS1; break;
+ case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1; break;
+ case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1; break;
+ case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1; break;
+ }
+ } else if (N->getOpcode() == X86ISD::FMADDS3) {
+ switch (NewOpcode) {
+ case ISD::FMA: NewOpcode = X86ISD::FMADDS3; break;
+ case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3; break;
+ case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3; break;
+ case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3; break;
+ }
} else if (N->getOpcode() == X86ISD::FMADDS1_RND) {
switch (NewOpcode) {
case ISD::FMA: NewOpcode = X86ISD::FMADDS1_RND; break;
case X86ISD::FMADD_RND:
case X86ISD::FMADDS1_RND:
case X86ISD::FMADDS3_RND:
+ case X86ISD::FMADDS1:
+ case X86ISD::FMADDS3:
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG);
FMADDSUB_RND,
FMSUBADD_RND,
+ // Scalar intrinsic FMA.
+ FMADDS1, FMADDS3,
+ FNMADDS1, FNMADDS3,
+ FMSUBS1, FMSUBS3,
+ FNMSUBS1, FNMSUBS3,
+
// Scalar intrinsic FMA with rounding mode.
// Two versions, passthru bits on op1 or op3.
FMADDS1_RND, FMADDS3_RND,
}
multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
- string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
- SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
+ string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
+ SDNode OpNodeRnds1, SDNode OpNodes3,
+ SDNode OpNodeRnds3, X86VectorVTInfo _,
+ string SUFF> {
let ExeDomain = _.ExeDomain in {
defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
// Operands for intrinsic are in 123 order to preserve passthu
// semantics.
- (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
- (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
- _.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
+ (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
+ (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
+ _.ScalarIntMemCPat:$src3)),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
(_.ScalarLdFrag addr:$src3)))), 0>;
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
- (_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
- (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
- _.RC:$src1, (i32 FROUND_CURRENT))),
+ (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
+ (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
+ _.RC:$src1)),
(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
(null_frag),
- (_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
- _.RC:$src2, (i32 FROUND_CURRENT))),
+ (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
+ _.RC:$src2)),
(null_frag),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
_.FRC:$src2))),
}
multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
- string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
+ string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
+ SDNode OpNodeRnds1, SDNode OpNodes3,
SDNode OpNodeRnds3> {
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
- OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">,
+ OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
+ f32x_info, "SS">,
EVEX_CD8<32, CD8VT1>, VEX_LIG;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
- OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">,
+ OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
+ f64x_info, "SD">,
EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
}
}
-defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1,
- X86FmaddRnds3>;
-defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1,
- X86FmsubRnds3>;
-defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd,
- X86FnmaddRnds1, X86FnmaddRnds3>;
-defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
- X86FnmsubRnds1, X86FnmsubRnds3>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
+ X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
+defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
+ X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
+ X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
+ X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
//===----------------------------------------------------------------------===//
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
+// Scalar FMA intrinsics with passthru bits in operand 1.
+def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>;
+def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>;
+def X86Fmsubs1 : SDNode<"X86ISD::FMSUBS1", SDTFPTernaryOp>;
+def X86Fnmsubs1 : SDNode<"X86ISD::FNMSUBS1", SDTFPTernaryOp>;
+
// Scalar FMA intrinsics with passthru bits in operand 1.
def X86FmaddRnds1 : SDNode<"X86ISD::FMADDS1_RND", SDTFmaRound>;
def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>;
def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>;
def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>;
+def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp>;
+def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp>;
+def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp>;
+def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp>;
+
// Scalar FMA intrinsics with passthru bits in operand 3.
def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound, [SDNPCommutative]>;
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, ISD::FMA,
X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, ISD::FMA,
X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, ISD::FMA,
X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,