From: Matt Arsenault Date: Fri, 2 Dec 2016 18:12:53 +0000 (+0000) Subject: AMDGPU: Implement isCheapAddrSpaceCast X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=76a17e03e013ee9f89893eeaad7962cf07aef117;p=llvm AMDGPU: Implement isCheapAddrSpaceCast git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288523 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index ac5fddfd42d..ef61fc409f8 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -600,8 +600,8 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, static bool isFlatGlobalAddrSpace(unsigned AS) { return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, @@ -609,6 +609,16 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); } +bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + // Flat -> private/local is a simple truncate. + // Flat -> global is no-op + if (SrcAS == AMDGPUAS::FLAT_ADDRESS) + return true; + + return isNoopAddrSpaceCast(SrcAS, DestAS); +} + bool SITargetLowering::isMemOpUniform(const SDNode *N) const { const MemSDNode *MemNode = cast(N); const Value *Ptr = MemNode->getMemOperand()->getValue(); diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 2e14f499a85..03846fd5473 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -130,6 +130,7 @@ public: bool isMemOpUniform(const SDNode *N) const; bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; + bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll new file mode 100644 index 00000000000..adeba26a6d4 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll @@ -0,0 +1,121 @@ +; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s + +; CHECK-LABEL: @no_sink_local_to_flat( +; CHECK: addrspacecast +; CHECK: br +; CHECK-NOT: addrspacecast +define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64 addrspace(4)* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64 addrspace(3)* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64 addrspace(4)* %ptr_cast + ret i64 %v2 +} + +; CHECK-LABEL: @no_sink_private_to_flat( +; CHECK: addrspacecast +; CHECK: br +; CHECK-NOT: addrspacecast +define i64 @no_sink_private_to_flat(i1 %pred, i64* %ptr) { + %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64 addrspace(4)* %ptr_cast + ret i64 %v2 +} + + +; CHECK-LABEL: @sink_global_to_flat( +; CHECK-NOT: addrspacecast +; CHECK: br +; CHECK: addrspacecast +define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64 addrspace(4)* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64 addrspace(1)* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64 addrspace(4)* %ptr_cast + ret i64 %v2 +} + +; CHECK-LABEL: @sink_flat_to_global( +; CHECK-NOT: addrspacecast +; CHECK: br +; CHECK: addrspacecast +define i64 @sink_flat_to_global(i1 %pred, i64 addrspace(4)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(1)* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64 addrspace(4)* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64 addrspace(1)* %ptr_cast + ret i64 %v2 +} + +; CHECK-LABEL: @sink_flat_to_constant( +; CHECK-NOT: addrspacecast +; CHECK: br +; CHECK: addrspacecast +define i64 @sink_flat_to_constant(i1 %pred, i64 addrspace(4)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(2)* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64 addrspace(4)* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64 addrspace(2)* %ptr_cast + ret i64 %v2 +} + +; CHECK-LABEL: @sink_flat_to_local( +; CHECK-NOT: addrspacecast +; CHECK: br +; CHECK: addrspacecast +define i64 @sink_flat_to_local(i1 %pred, i64 addrspace(4)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(3)* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64 addrspace(4)* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64 addrspace(3)* %ptr_cast + ret i64 %v2 +} + +; CHECK-LABEL: @sink_flat_to_private( +; CHECK-NOT: addrspacecast +; CHECK: br +; CHECK: addrspacecast +define i64 @sink_flat_to_private(i1 %pred, i64 addrspace(4)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64* + br i1 %pred, label %l1, label %l2 + +l1: + %v1 = load i64, i64 addrspace(4)* %ptr + ret i64 %v1 + +l2: + %v2 = load i64, i64* %ptr_cast + ret i64 %v2 +}