// first.
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case CVTPD2PS:
- // ISD::FP_ROUND has a second argument that indicates if the truncation
- // does not change the value. Set it to 0 since it can change.
- return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
- DAG.getIntPtrConstant(0, dl));
- case CVTPD2PS_RND_MASK: {
- SDValue Src = Op.getOperand(1);
- SDValue PassThru = Op.getOperand(2);
- SDValue Mask = Op.getOperand(3);
- // We add rounding mode to the Node when
- // - RM Opcode is specified and
- // - RM is not "current direction".
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd)) {
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
- }
- assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!");
- // ISD::FP_ROUND has a second argument that indicates if the truncation
- // does not change the value. Set it to 0 since it can change.
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
- DAG.getIntPtrConstant(0, dl)),
- Mask, PassThru, Subtarget, DAG);
- }
case FPCLASSS: {
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
-def : Pat<(v8f64 (extloadv8f32 addr:$src)),
- (VCVTPS2PDZrm addr:$src)>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
+ (VCVTPD2PSZrr VR512:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+ VR256X:$src0),
+ (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+ v8f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
+
+ def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
+ (VCVTPD2PSZrm addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+ VR256X:$src0),
+ (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+ v8f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2PSZrmb addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask,
+ (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (v8f32 VR256X:$src0)),
+ (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask,
+ (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+ v8f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f64 (extloadv8f32 addr:$src)),
+ (VCVTPS2PDZrm addr:$src)>;
+}
let Predicates = [HasVLX] in {
+ def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
+ (VCVTPD2PSZ256rr VR256X:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+ VR128X:$src0),
+ (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+ v4f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+ (VCVTPD2PSZ256rm addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+ VR128X:$src0),
+ (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+ v4f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2PSZ256rmb addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask,
+ (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ VR128X:$src0),
+ (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask,
+ (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ v4f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
+
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
(VCVTPD2PSZ128rr VR128X:$src)>;
(VCVTUDQ2PDZ128rm addr:$src)>;
}
-let Predicates = [HasAVX512] in {
- def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
- (VCVTPD2PSZrm addr:$src)>;
- def : Pat<(v8f64 (extloadv8f32 addr:$src)),
- (VCVTPS2PDZrm addr:$src)>;
-}
-
let Predicates = [HasDQI, HasVLX] in {
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
- SDTCisSameSizeAs<0, 1>]>>;
+ SDTCisOpSmallerThanOp<0, 1>]>>;
def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (fpround VR256:$src))]>,
+ [(set VR128:$dst, (X86vfpround VR256:$src))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
+ [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
}
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
// whenever possible to avoid declaring two versions of each one.
let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
// Match fpround and fpextend for 128/256-bit conversions
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
INTR_TYPE_3OP_IMM8,
CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV,
- CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
+ CVTPD2PS_MASK,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK,
X86_INTRINSIC_DATA(avx_blendv_ps_256, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
- X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
+ X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK,
X86ISD::VFPROUND, X86ISD::VMFPROUND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
- ISD::FP_ROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,