From: Matt Arsenault Date: Fri, 28 Apr 2017 22:18:08 +0000 (+0000) Subject: InferAddressSpaces: Infer from just addrspacecasts X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=afc9030f6722d1603b647372df68ac5caaecb520;p=llvm InferAddressSpaces: Infer from just addrspacecasts Eliminates some more cases where some subset of the addressing computation remains flat. Some cases with addrspacecasts in nested constant expressions are still left behind however. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301704 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 9e2563879da..a9505a45eef 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -326,6 +326,9 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { PushPtrOperand(Cmp->getOperand(0)); PushPtrOperand(Cmp->getOperand(1)); } + } else if (auto *ASC = dyn_cast(&I)) { + if (!ASC->getType()->isVectorTy()) + PushPtrOperand(ASC->getPointerOperand()); } } @@ -881,6 +884,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } + if (AddrSpaceCastInst *ASC = dyn_cast(CurUser)) { + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (ASC->getDestAddressSpace() == NewAS) { + ASC->replaceAllUsesWith(NewV); + ASC->eraseFromParent(); + continue; + } + } + // Otherwise, replaces the use with flat(NewV). if (Instruction *I = dyn_cast(V)) { BasicBlock::iterator InsertPos = std::next(I->getIterator()); diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll b/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll new file mode 100644 index 00000000000..73829e86527 --- /dev/null +++ b/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s + +; Test that pure addrspacecast instructions not directly connected to +; a memory operation are inferred. + +; CHECK-LABEL: @addrspacecast_gep_addrspacecast( +; CHECK: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9 +; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8 +; CHECK-NEXT: ret void +define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) { + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* + %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 + %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)* + store i32 8, i32 addrspace(3)* %asc1, align 8 + ret void +} + +; CHECK-LABEL: @addrspacecast_to_memory( +; CHECK: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9 +; CHECK-NEXT: store volatile i32 addrspace(3)* %gep0, i32 addrspace(3)* addrspace(1)* undef +; CHECK-NEXT: ret void +define void @addrspacecast_to_memory(i32 addrspace(3)* %ptr) { + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* + %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 + %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)* + store volatile i32 addrspace(3)* %asc1, i32 addrspace(3)* addrspace(1)* undef + ret void +} + +; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast( +; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* +; CHECK-NEXT: store volatile i32 addrspace(4)* %1, i32 addrspace(4)* addrspace(1)* undef +; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9 +; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8 +; CHECK-NEXT: ret void +define void @multiuse_addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) { + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* + store volatile i32 addrspace(4)* %asc0, i32 addrspace(4)* addrspace(1)* undef + %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 + %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)* + store i32 8, i32 addrspace(3)* %asc1, align 8 + ret void +} diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll index 6b94a74da35..8b482ba8a29 100644 --- a/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll +++ b/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll @@ -5,10 +5,20 @@ @lds = internal unnamed_addr addrspace(3) global [648 x double] undef, align 8 +; CHECK-LABEL: @simplified_constexpr_gep_addrspacecast( +; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0 +; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8 +define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { + %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0 + %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)* + store double 1.000000e+00, double addrspace(3)* %asc, align 8 + ret void +} + +; FIXME: Should be able to eliminate inner constantexpr addrspacecast. ; CHECK-LABEL: @constexpr_gep_addrspacecast( -; CHECK: %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0 -; CHECK-NEXT: %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)* -; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %asc +; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* addrspacecast (double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384) to double addrspace(3)*), i64 %idx0 +; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8 define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0 %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)* @@ -18,10 +28,8 @@ define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { ; CHECK-LABEL: @constexpr_gep_gep_addrspacecast( ; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0 -; CHECK-NEXT: %1 = addrspacecast double addrspace(3)* %gep0 to double addrspace(4)* -; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(4)* %1, i64 %idx1 -; CHECK-NEXT: %asc = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)* -; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %asc, align 8 +; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* %gep0, i64 %idx1 +; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8 define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0 %gep1 = getelementptr inbounds double, double addrspace(4)* %gep0, i64 %idx1