This is very related to the bug fixed in r174431. The problem is that
SelectionDAG does not include alignment in the uniquing of loads and
stores. When an otherwise no-op DAGCombine would increase the alignment
of a load or store, the original node would be returned (with the
alignment increased), which would cause the node not to be processed by
any further DAGCombines.
I don't have a direct testcase for this that manifests on an in-tree
target, but I did see some noise in the tests for other targets and have
updated them for it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232780
91177308-0d34-0410-b5e6-
96231b3b80d8
LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), Align, LD->getAAInfo());
- return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ if (NewLoad.getNode() != N)
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment())
- return DAG.getTruncStore(Chain, SDLoc(N), Value,
+ if (Align > ST->getAlignment()) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
ST->getAAInfo());
+ if (NewStore.getNode() != N)
+ return CombineTo(ST, NewStore, true);
+ }
}
}
define void @vst(i8* %m, [4 x i64] %v) {
entry:
; CHECK: vst:
-; CHECK: VST1d64Q %R{{[0-9]+}}<kill>, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
+; CHECK: VST1d64Q %R{{[0-9]+}}<kill>, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use,kill>
%v0 = extractvalue [4 x i64] %v, 0
%v1 = extractvalue [4 x i64] %v, 1
; DARWIN32: lwz r[[REG3:[0-9]+]], 108(r1)
; DARWIN32: mr r[[REG2:[0-9]+]], r[[REG4]]
; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
-; DARWIN32: stw r[[REG4]], -[[OFFSET1:[0-9]+]]
+; DARWIN32: stw r[[REG2]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG3]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r[[REG1]], -[[OFFSET1]]
; DARWIN32: lwz r[[REG1]], -[[OFFSET2]]
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
-; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
-; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
-; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
+; CHECK: lsls r[[ADDR:[0-9]+]], r[[ADDR]], #2
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128], r[[ADDR]]
+; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[SOURCE]]:32]
; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0]
%val = extractelement <4 x i32> %phitmp, i32 %lane
%r1 = insertelement <4 x i32> undef, i32 %val, i32 1