From: Wolfgang Pieb Date: Mon, 26 Jun 2017 23:05:51 +0000 (+0000) Subject: DAGCombine: Make sure we only eliminate trunc/extend when the scales of truncation... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ffee4824e9e7c21fd88df7a6e3665afa2b427a2b;p=llvm DAGCombine: Make sure we only eliminate trunc/extend when the scales of truncation and extension match. This fixes PR33368. Reviewer: rksimon Differential Revision: https://reviews.llvm.org/D34069 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306345 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d02dcb6f443..d1a5a98607c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15013,6 +15013,11 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); + + if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) + return SDValue(); + unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> @@ -15034,11 +15039,10 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, if (EltSizeInBits != ExtSrcSizeInBits) return SDValue(); - // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for - // power-of-2 truncations as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) - if (isTruncate(Scale)) - return DAG.getBitcast(VT, N00); + // We can remove *extend_vector_inreg only if the truncation happens at + // the same scale as the extension. + if (isTruncate(ExtScale)) + return DAG.getBitcast(VT, N00); return SDValue(); } diff --git a/test/CodeGen/X86/vector-truncate-combine.ll b/test/CodeGen/X86/vector-truncate-combine.ll new file mode 100644 index 00000000000..1a6dac8fa6e --- /dev/null +++ b/test/CodeGen/X86/vector-truncate-combine.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=x86_64-- -O2 -start-after=stack-protector -stop-before=loops %s -o - | FileCheck %s + +; This test verifies the fix for PR33368. +; +; The expected outcome of the operation is to store bytes 0 and 2 of the incoming +; parameter into c2 (a 2 x i8 vector). DAGCombine converts shuffles into a +; sequence of extend and subsequent truncate operations. The bug was that an extension +; by 4 followed by a truncation by 8 was completely eliminated. + +; The test checks for the correct sequence of operations that results from the +; preservation of the extend/truncate operations mentioned above (2 extend and +; 3 truncate instructions). +; +; NOTE: This operation could be collapsed in to a single truncate. Once that is done +; this test will have to be adjusted. + +; CHECK: PUNPCKLBWrr +; CHECK: PUNPCKLWDrr +; CHECK: PACKUSWBrr +; CHECK: PACKUSWBrr +; CHECK: PACKUSWBrr + +define void @test(double %vec.coerce) local_unnamed_addr { +entry: + %c2 = alloca <2 x i8>, align 2 + %0 = bitcast double %vec.coerce to <8 x i8> + %1 = shufflevector <8 x i8> %0, <8 x i8> undef, <4 x i32> + %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <2 x i32> + store volatile <2 x i8> %2, <2 x i8>* %c2, align 2 + br label %if.end + +if.end: + %3 = bitcast <2 x i8> %2 to i16 + ret void +}