From 1e95d74db438a7e6bef0893aaa64be2cdbb7f1a1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2019 20:14:09 +0000 Subject: [PATCH] [X86] Use X86ISD::VFPROUND instead of ISD::FP_ROUND for 256 and 512 bit cvtpd2ps intrinsics. Summary: Use X86ISD::VFPROUND in the instruction isel patterns. Add new patterns for ISD::FP_ROUND to maintain support for fptrunc in IR. In the process I found a couple duplicate isel patterns which I also deleted in this patch. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 29 ---------- lib/Target/X86/X86InstrAVX512.td | 75 +++++++++++++++++++++---- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 +- lib/Target/X86/X86InstrSSE.td | 9 ++- lib/Target/X86/X86IntrinsicsInfo.h | 8 +-- 5 files changed, 76 insertions(+), 47 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6bcb28ac4a0..a800a42818b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21881,35 +21881,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // first. return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); - case CVTPD2PS: - // ISD::FP_ROUND has a second argument that indicates if the truncation - // does not change the value. Set it to 0 since it can change. - return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1), - DAG.getIntPtrConstant(0, dl)); - case CVTPD2PS_RND_MASK: { - SDValue Src = Op.getOperand(1); - SDValue PassThru = Op.getOperand(2); - SDValue Mask = Op.getOperand(3); - // We add rounding mode to the Node when - // - RM Opcode is specified and - // - RM is not "current direction". - unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; - if (IntrWithRoundingModeOpcode != 0) { - SDValue Rnd = Op.getOperand(4); - if (!isRoundModeCurDirection(Rnd)) { - return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, - dl, Op.getValueType(), - Src, Rnd), - Mask, PassThru, Subtarget, DAG); - } - } - assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!"); - // ISD::FP_ROUND has a second argument that indicates if the truncation - // does not change the value. Set it to 0 since it can change. - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, - DAG.getIntPtrConstant(0, dl)), - Mask, PassThru, Subtarget, DAG); - } case FPCLASSS: { SDValue Src1 = Op.getOperand(1); SDValue Imm = Op.getOperand(2); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 9509a049d82..90ccf1f5eb9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8092,7 +8092,7 @@ multiclass avx512_cvtps2pd opc, string OpcodeStr, // Truncate Double to Float multiclass avx512_cvtpd2ps opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } @@ -8100,7 +8100,7 @@ multiclass avx512_cvtpd2ps opc, string OpcodeStr, X86SchedWriteWidths sc defm Z128 : avx512_vcvt_fp, EVEX_V128; - defm Z256 : avx512_vcvt_fp, EVEX_V256; def : InstAlias, defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, PS, EVEX_CD8<32, CD8VH>; -def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; +let Predicates = [HasAVX512] in { + def : Pat<(v8f32 (fpround (v8f64 VR512:$src))), + (VCVTPD2PSZrr VR512:$src)>; + def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), + VR256X:$src0), + (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>; + def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), + v8f32x_info.ImmAllZerosV), + (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>; + + def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), + (VCVTPD2PSZrm addr:$src)>; + def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), + VR256X:$src0), + (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>; + def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), + v8f32x_info.ImmAllZerosV), + (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; + + def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))), + (VCVTPD2PSZrmb addr:$src)>; + def : Pat<(vselect VK8WM:$mask, + (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (v8f32 VR256X:$src0)), + (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; + def : Pat<(vselect VK8WM:$mask, + (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + v8f32x_info.ImmAllZerosV), + (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; + + def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; +} let Predicates = [HasVLX] in { + def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))), + (VCVTPD2PSZ256rr VR256X:$src)>; + def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), + VR128X:$src0), + (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; + def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), + v4f32x_info.ImmAllZerosV), + (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>; + + def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), + (VCVTPD2PSZ256rm addr:$src)>; + def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), + VR128X:$src0), + (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; + def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), + v4f32x_info.ImmAllZerosV), + (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; + + def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (VCVTPD2PSZ256rmb addr:$src)>; + def : Pat<(vselect VK4WM:$mask, + (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + VR128X:$src0), + (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; + def : Pat<(vselect VK4WM:$mask, + (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + v4f32x_info.ImmAllZerosV), + (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), (VCVTPD2PSZ128rr VR128X:$src)>; @@ -8800,13 +8860,6 @@ let Predicates = [HasAVX512, HasVLX] in { (VCVTUDQ2PDZ128rm addr:$src)>; } -let Predicates = [HasAVX512] in { - def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), - (VCVTPD2PSZrm addr:$src)>; - def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; -} - let Predicates = [HasDQI, HasVLX] in { def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 6b9b28b0284..cde6337d576 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -126,7 +126,7 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT", def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, f64>, - SDTCisSameSizeAs<0, 1>]>>; + SDTCisOpSmallerThanOp<0, 1>]>>; def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 64b17f194cc..5e528ef29bb 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1769,11 +1769,11 @@ def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", let Predicates = [HasAVX, NoVLX] in { def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (fpround VR256:$src))]>, + [(set VR128:$dst, (X86vfpround VR256:$src))]>, VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>, + [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>, VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; } def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", @@ -1795,6 +1795,11 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // whenever possible to avoid declaring two versions of each one. let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + // Match fpround and fpextend for 128/256-bit conversions def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86vfpround (v2f64 VR128:$src)))))), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 6ff3c65b1ac..b9afc2436d1 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, INTR_TYPE_3OP_IMM8, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, - CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK, + CVTPD2PS_MASK, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, @@ -343,7 +343,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_blendv_ps_256, BLENDV, X86ISD::BLENDV, 0), X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), - X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), + X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), @@ -515,8 +515,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK, X86ISD::VFPROUND, X86ISD::VMFPROUND), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK, - ISD::FP_ROUND, X86ISD::VFPROUND_RND), + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK, + X86ISD::VFPROUND, X86ISD::VFPROUND_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK, -- 2.50.1