From d9e2ef1edc3dfa8e6917148dabe58d0cba4a848a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 28 Mar 2019 11:34:21 +0000 Subject: [PATCH] [DAGCombiner] Fold truncate(build_vector(x,y)) -> build_vector(truncate(x),truncate(y)) If scalar truncates are free, attempt to pre-truncate build_vectors source operands. Only attempt to do this before legalization as we often end up with truncations/extensions during build_vector lowering. Differential Revision: https://reviews.llvm.org/D59654 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357161 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +++++++++++++++- test/CodeGen/X86/bool-vector.ll | 20 ++++++++++---------- test/CodeGen/X86/known-signbits-vector.ll | 15 ++++----------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ded4f04c57a..1c44a5d5ce3 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9895,10 +9895,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate - if (N0.getValueType() == N->getValueType(0)) + if (SrcVT == VT) return N0; // fold (truncate (truncate x)) -> (truncate x) @@ -9999,6 +10000,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Attempt to pre-truncate BUILD_VECTOR sources. + if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { + SDLoc DL(N); + EVT SVT = VT.getScalarType(); + SmallVector TruncOps; + for (const SDValue &Op : N0->op_values()) { + SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op); + TruncOps.push_back(TruncOp); + } + return DAG.getBuildVector(VT, DL, TruncOps); + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to diff --git a/test/CodeGen/X86/bool-vector.ll b/test/CodeGen/X86/bool-vector.ll index 1afa3bc5eb7..7850d06bd7d 100644 --- a/test/CodeGen/X86/bool-vector.ll +++ b/test/CodeGen/X86/bool-vector.ll @@ -77,17 +77,17 @@ define i32 @PR15215_good(<4 x i32> %input) { ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %esi, -8 -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $1, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X32-NEXT: andl $1, %edx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: andl $1, %esi -; X32-NEXT: leal (%eax,%ecx,2), %eax -; X32-NEXT: leal (%eax,%edx,4), %eax -; X32-NEXT: leal (%eax,%esi,8), %eax +; X32-NEXT: andl $1, %edx +; X32-NEXT: andl $1, %ecx +; X32-NEXT: andl $1, %eax +; X32-NEXT: leal (%esi,%edx,2), %edx +; X32-NEXT: leal (%edx,%ecx,4), %ecx +; X32-NEXT: leal (%ecx,%eax,8), %eax ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index 0e6c067c159..1a6217f62f7 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -36,17 +36,10 @@ define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext ; ; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32: ; X64: # %bb.0: -; X64-NEXT: movslq %edi, %rax -; X64-NEXT: movslq %esi, %rsi -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: movslq %ecx, %rcx -; X64-NEXT: vmovq %rcx, %xmm0 -; X64-NEXT: vmovq %rdx, %xmm1 -; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-NEXT: vmovq %rsi, %xmm1 -; X64-NEXT: vmovq %rax, %xmm2 -; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] +; X64-NEXT: vmovd %edi, %xmm0 +; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 +; X64-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = sext i8 %a0 to i64 -- 2.50.1