From df60d8e59e3d851a042867b676602f70d7887e3a Mon Sep 17 00:00:00 2001 From: Mark Searles Date: Mon, 16 Oct 2017 23:38:53 +0000 Subject: [PATCH] =?utf8?q?Use=20the=20return=20value=20of=20UpdateNodeOper?= =?utf8?q?ands();=20in=20some=20cases,=20UpdateNodeOperands()=20modifies?= =?utf8?q?=20the=20node=20in-place=20and=20using=20the=20return=20value=20?= =?utf8?q?isn=E2=80=99t=20strictly=20necessary.=20However,=20it=20does=20n?= =?utf8?q?ot=20necessarily=20modify=20the=20node,=20but=20may=20return=20a?= =?utf8?q?=20resultant=20node=20if=20it=20already=20exists=20in=20the=20DA?= =?utf8?q?G.=20See=20comments=20in=20UpdateNodeOperands().=20In=20that=20c?= =?utf8?q?ase,=20the=20return=20value=20must=20be=20used=20to=20avoid=20su?= =?utf8?q?ch=20scenarios=20as=20an=20infinite=20loop=20(node=20is=20assume?= =?utf8?q?d=20to=20have=20been=20updated,=20so=20added=20back=20to=20the?= =?utf8?q?=20worklist,=20and=20re-processed;=20however,=20node=20hasn?= =?utf8?q?=E2=80=99t=20changed=20so=20it=20is=20once=20again=20passed=20to?= =?utf8?q?=20UpdateNodeOperands(),=20assumed=20modified,=20added=20back=20?= =?utf8?q?to=20worklist;=20cycle=20infinitely=20repeats).?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D38466 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315957 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- lib/Target/AMDGPU/SIISelLowering.cpp | 3 +- .../AMDGPU/simplifydemandedbits-recursion.ll | 95 +++++++++++++++++++ 3 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a0be1c9f11f..03944321f79 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -469,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, } NewOps.push_back(User->getOperand(i)); } - TLO.DAG.UpdateNodeOperands(User, NewOps); + User = TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 82d1bc270a4..64a4d06e95c 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6507,8 +6507,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, Node->getOperand(i)), 0)); } - DAG.UpdateNodeOperands(Node, Ops); - return Node; + return DAG.UpdateNodeOperands(Node, Ops); } /// \brief Fold the instructions after selecting them. diff --git a/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll new file mode 100644 index 00000000000..f8077cd8e3a --- /dev/null +++ b/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -0,0 +1,95 @@ +; RUN: llc -march=amdgcn < %s | FileCheck %s + +; Check we can compile this bugpoint-reduced test without an +; infinite loop in TLI.SimplifyDemandedBits() due to failure +; to use return value of TLO.DAG.UpdateNodeOperands() + +; Check that code was generated; we know there will be +; a s_endpgm, so check for it. + +@0 = external unnamed_addr addrspace(3) global [462 x float], align 4 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.y() #0 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #0 + +; CHECK: s_endpgm +define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp5 = and i32 %tmp, 15 + %tmp6 = mul nuw nsw i32 %tmp5, 21 + %tmp7 = sub i32 %tmp6, 0 + %tmp8 = add i32 %tmp7, 0 + %tmp9 = add i32 %tmp8, 0 + %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0 + br label %bb12 + +bb11: ; preds = %bb30 + br i1 undef, label %bb37, label %bb38 + +bb12: ; preds = %bb30, %bb + br i1 false, label %.preheader, label %.loopexit145 + +.loopexit145: ; preds = %.preheader, %bb12 + br label %bb13 + +bb13: ; preds = %.loopexit, %.loopexit145 + %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ] + %tmp15 = add nsw i32 %tmp14, -3 + %tmp16 = mul i32 %tmp14, 21 + br i1 undef, label %bb17, label %.loopexit + +bb17: ; preds = %bb13 + %tmp18 = mul i32 %tmp15, 224 + %tmp19 = add i32 undef, %tmp18 + br label %bb21 + +.loopexit: ; preds = %bb21, %bb13 + %tmp20 = add nuw nsw i32 %tmp14, 16 + br i1 undef, label %bb13, label %bb26 + +bb21: ; preds = %bb21, %bb17 + %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] + %tmp23 = add i32 %tmp22, %tmp16 + %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23 + store float undef, float addrspace(3)* %tmp24, align 4 + %tmp25 = add nuw i32 %tmp22, 8 + br i1 undef, label %bb21, label %.loopexit + +bb26: ; preds = %.loopexit + br label %bb31 + +.preheader: ; preds = %.preheader, %bb12 + %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ] + %tmp28 = add nuw i32 %tmp27, 128 + %tmp29 = icmp ult i32 %tmp28, 1568 + br i1 %tmp29, label %.preheader, label %.loopexit145 + +bb30: ; preds = %bb31 + br i1 undef, label %bb11, label %bb12 + +bb31: ; preds = %bb31, %bb26 + %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ] + %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32 + %tmp34 = load float, float addrspace(3)* %tmp33, align 4 + %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef) + %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35) + br i1 undef, label %bb30, label %bb31 + +bb37: ; preds = %bb11 + br label %bb38 + +bb38: ; preds = %bb37, %bb11 + ret void +} + +attributes #0 = { nounwind readnone speculatable } + +!0 = !{i32 8, i32 16, i32 1} -- 2.50.1