From 2a46e93aedd2d4f2b3e5049e45e3a358cab1577f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 1 Feb 2017 22:00:37 +0000 Subject: [PATCH] Merging r293542: ------------------------------------------------------------------------ r293542 | arsenm | 2017-01-30 11:50:17 -0800 (Mon, 30 Jan 2017) | 7 lines LSR: Don't drop address space when type doesn't match For targets with different addressing modes in each address space, if this is dropped querying isLegalAddressingMode later with this will give a nonsense result, breaking the isLegalUse assertions. This is a candidate for the 4.0 release branch. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@293819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 11 ++-- .../AMDGPU/preserve-addrspace-assert.ll | 54 +++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 01728ae680d..194587a85e7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -158,8 +158,9 @@ struct MemAccessTy { bool operator!=(MemAccessTy Other) const { return !(*this == Other); } - static MemAccessTy getUnknown(LLVMContext &Ctx) { - return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace); + static MemAccessTy getUnknown(LLVMContext &Ctx, + unsigned AS = UnknownAddressSpace) { + return MemAccessTy(Type::getVoidTy(Ctx), AS); } }; @@ -2279,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, // TODO: Be less conservative when the type is similar and can use the same // addressing modes. if (Kind == LSRUse::Address) { - if (AccessTy != LU.AccessTy) - NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext()); + if (AccessTy.MemTy != LU.AccessTy.MemTy) { + NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), + AccessTy.AddrSpace); + } } // Conservatively assume HasBaseReg is true for now. diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll new file mode 100644 index 00000000000..b3b696d42c5 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll @@ -0,0 +1,54 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -loop-reduce %s | FileCheck %s + +; Test for assert resulting from inconsistent isLegalAddressingMode +; answers when the address space was dropped from the query. + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +%0 = type { i32, double, i32, float } + +; CHECK-LABEL: @lsr_crash_preserve_addrspace_unknown_type( +; CHECK: %tmp4 = bitcast %0 addrspace(3)* %tmp to double addrspace(3)* +; CHECK: %scevgep5 = getelementptr double, double addrspace(3)* %tmp4, i32 1 +; CHEC: load double, double addrspace(3)* %scevgep5 + +; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4 +; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep +define void @lsr_crash_preserve_addrspace_unknown_type() #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb17, %bb + %tmp = phi %0 addrspace(3)* [ undef, %bb ], [ %tmp18, %bb17 ] + %tmp2 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 1 + %tmp3 = load double, double addrspace(3)* %tmp2, align 8 + br label %bb4 + +bb4: ; preds = %bb1 + br i1 undef, label %bb8, label %bb5 + +bb5: ; preds = %bb4 + unreachable + +bb8: ; preds = %bb4 + %tmp9 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 0 + %tmp10 = load i32, i32 addrspace(3)* %tmp9, align 4 + %tmp11 = icmp eq i32 0, %tmp10 + br i1 %tmp11, label %bb12, label %bb17 + +bb12: ; preds = %bb8 + %tmp13 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 2 + %tmp14 = load i32, i32 addrspace(3)* %tmp13, align 4 + %tmp15 = icmp eq i32 0, %tmp14 + br i1 %tmp15, label %bb16, label %bb17 + +bb16: ; preds = %bb12 + unreachable + +bb17: ; preds = %bb12, %bb8 + %tmp18 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 2 + br label %bb1 +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } -- 2.50.1