AMDGPU/SI: Make sure not to fold offsets into local address space globals

author Tom Stellard <thomas.stellard@amd.com>

Sat, 25 Jun 2016 01:59:16 +0000 (01:59 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Sat, 25 Jun 2016 01:59:16 +0000 (01:59 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Sat, 25 Jun 2016 01:59:16 +0000 (01:59 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Sat, 25 Jun 2016 01:59:16 +0000 (01:59 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 60fe8c8bf546e9672acfb404237860ac622c0bba..3142e4c8f4278c405713bd1aa27519203e623db6 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1422,6 +1422,14 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
    return DAG.getUNDEF(ASC->getValueType(0));
  }
  
+bool
+SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+    return false;
+
+  return TargetLowering::isOffsetFoldingLegal(GA);
+}
+
  SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                               SDValue Op,
                                               SelectionDAG &DAG) const {
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h

index 2f013198970b626b6f906a0802a3bb33842dcce8..032372b7b17a79d6793973278f793f32776b67e1 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -105,6 +105,8 @@ public:
  
    bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
  
+  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
    SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
                                 bool isVarArg,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/test/CodeGen/AMDGPU/gv-offset-folding.ll b/test/CodeGen/AMDGPU/gv-offset-folding.ll

new file mode 100644 (file)

index 0000000..c75fdb3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/gv-offset-folding.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -relocation-model=static < %s | FileCheck %s
+
+@lds = external addrspace(3) global [4 x i32]
+
+; Function Attrs: nounwind
+
+; Offset folding is an optimization done for global variables with relocations,
+; which allows you to store the offset in the r_addend of the relocation entry.
+; The offset is apllied to the variables address at link time, which eliminates
+; the need to emit shader instructions to do this calculation.
+; We don't use relocations for local memory, so we should never fold offsets
+; for local memory globals.
+
+; CHECK-LABEL: lds_no_offset:
+; CHECK ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:4
+define void @lds_no_offset() {
+entry:
+  %ptr = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds, i32 0, i32 1
+  store i32 0, i32 addrspace(3)* %ptr
+  ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Sat, 25 Jun 2016 01:59:16 +0000 (01:59 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Sat, 25 Jun 2016 01:59:16 +0000 (01:59 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.h		patch \| blob \| history
test/CodeGen/AMDGPU/gv-offset-folding.ll	[new file with mode: 0644]	patch \| blob