From 360c172b79e8fd29d091149ee341a1e12517192e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 6 Aug 2019 21:43:15 +0000 Subject: [PATCH] [X86] Don't allow combineSIntToFP to create v2i32 vectors after type legalization. If we're after type legalization we should only be trying to turn v2i64 into v2i32. So bitcast to v4i32, shuffle the even elements together. Then use X86ISD::CVTSI2P. The alternative is to leave the v2i64 type alone and let it scalarized. Hopefully keeping it packed is better. Fixes PR42905. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++---- test/CodeGen/X86/pr42905.ll | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/X86/pr42905.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 43bf7289c16..826ce263aca 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -42848,6 +42848,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, } static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. @@ -42877,13 +42878,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, unsigned BitWidth = InVT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); if (NumSignBits >= (BitWidth - 31)) { - EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32); + EVT TruncVT = MVT::i32; if (InVT.isVector()) TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, InVT.getVectorNumElements()); SDLoc dl(N); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); - return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + } + // If we're after legalize and the type is v2i32 we need to shuffle and + // use CVTSI2P. + assert(InVT == MVT::v2i64 && "Unexpected VT!"); + SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, + { 0, 2, -1, -1 }); + return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); } } @@ -44481,7 +44491,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); + case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); diff --git a/test/CodeGen/X86/pr42905.ll b/test/CodeGen/X86/pr42905.ll new file mode 100644 index 00000000000..bb51aced225 --- /dev/null +++ b/test/CodeGen/X86/pr42905.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define <4 x double> @autogen_SD30452(i1 %L230) { +; CHECK-LABEL: autogen_SD30452: +; CHECK: # %bb.0: # %BB +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829] +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: cvtsi2sd %rax, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; CHECK-NEXT: movq %xmm2, %rax +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: cvtsi2sd %rax, %xmm2 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 +; CHECK-NEXT: retq +BB: + %I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3 + %Shuff7 = shufflevector <4 x i64> %I, <4 x i64> zeroinitializer, <4 x i32> + br label %CF242 + +CF242: ; preds = %CF242, %BB + %FC125 = sitofp <4 x i64> %Shuff7 to <4 x double> + ret <4 x double> %FC125 +} -- 2.40.0