From: Tim Northover Date: Tue, 27 Aug 2019 10:21:11 +0000 (+0000) Subject: AArch64: avoid creating cycle in DAG for post-increment NEON ops. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b15c50b407d3d21b0cda46d50fe6da303258cd3;p=llvm AArch64: avoid creating cycle in DAG for post-increment NEON ops. Inserting a value into Visited has the effect of terminating a search for predecessors if that node is seen. This is legitimate for the base address, and acts as a slight performance optimization, but the vector-building node can be paert of a legitimate cycle so we shouldn't stop searching there. PR43056. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370036 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index be248ee898c..6d2f363858e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N, // are predecessors to each other or the Vector. SmallPtrSet Visited; SmallVector Worklist; - Visited.insert(N); + Visited.insert(Addr.getNode()); Worklist.push_back(User); Worklist.push_back(LD); Worklist.push_back(Vector.getNode()); diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 8b6a4cae7ed..f3ac9b21f53 100644 --- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -6319,3 +6319,22 @@ define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* store <8 x i8> %sub, <8 x i8>* %p ret void } + +define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) { +; CHECK-LABEL: test_inc_cycle: +; CHECK: ld1.s { v0 }[0], [x0]{{$}} + + %elt = load i32, i32* %in + %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0 + + ; %inc cannot be %elt directly because we check that the load is only + ; used by the insert before trying to form post-inc. + %inc.vec = bitcast <4 x i32> %newvec to <2 x i64> + %inc = extractelement <2 x i64> %inc.vec, i32 0 + %newaddr = getelementptr i32, i32* %in, i64 %inc + store i32* %newaddr, i32** @var + + ret <4 x i32> %newvec +} + +@var = global i32* null