From: Owen Anderson Date: Thu, 19 Mar 2015 22:48:57 +0000 (+0000) Subject: Fix a nasty bug in DAGCombine of STORE nodes. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8154ef7589acc76850d6e430adfc0d60e82d94cf;p=llvm Fix a nasty bug in DAGCombine of STORE nodes. This is very related to the bug fixed in r174431. The problem is that SelectionDAG does not include alignment in the uniquing of loads and stores. When an otherwise no-op DAGCombine would increase the alignment of a load or store, the original node would be returned (with the alignment increased), which would cause the node not to be processed by any further DAGCombines. I don't have a direct testcase for this that manifests on an in-tree target, but I did see some noise in the tests for other targets and have updated them for it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232780 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8ea1d41b245..702147155ba 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9072,7 +9072,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Align, LD->getAAInfo()); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } @@ -10703,11 +10704,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, SDLoc(N), Value, + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } } } diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll index dc7f3081e03..ef575f4c41e 100644 --- a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll +++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll @@ -4,7 +4,7 @@ define void @vst(i8* %m, [4 x i64] %v) { entry: ; CHECK: vst: -; CHECK: VST1d64Q %R{{[0-9]+}}, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}} +; CHECK: VST1d64Q %R{{[0-9]+}}, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}} %v0 = extractvalue [4 x i64] %v, 0 %v1 = extractvalue [4 x i64] %v, 1 diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll index 1a25f4bfbfe..e899cfa4ce0 100644 --- a/test/CodeGen/PowerPC/anon_aggr.ll +++ b/test/CodeGen/PowerPC/anon_aggr.ll @@ -165,7 +165,7 @@ unequal: ; DARWIN32: lwz r[[REG3:[0-9]+]], 108(r1) ; DARWIN32: mr r[[REG2:[0-9]+]], r[[REG4]] ; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]] -; DARWIN32: stw r[[REG4]], -[[OFFSET1:[0-9]+]] +; DARWIN32: stw r[[REG2]], -[[OFFSET1:[0-9]+]] ; DARWIN32: stw r[[REG3]], -[[OFFSET2:[0-9]+]] ; DARWIN32: lwz r[[REG1]], -[[OFFSET1]] ; DARWIN32: lwz r[[REG1]], -[[OFFSET2]] diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll index 937ecc0d667..5936b780300 100644 --- a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll +++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind { -; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128] -; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2 -; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32] +; CHECK: lsls r[[ADDR:[0-9]+]], r[[ADDR]], #2 +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128], r[[ADDR]] +; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[SOURCE]]:32] ; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0] %val = extractelement <4 x i32> %phitmp, i32 %lane %r1 = insertelement <4 x i32> undef, i32 %val, i32 1