return DAG.getUNDEF(ASC->getValueType(0));
}
+bool
+SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+ return false;
+
+ return TargetLowering::isOffsetFoldingLegal(GA);
+}
+
SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
--- /dev/null
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -relocation-model=static < %s | FileCheck %s
+
+@lds = external addrspace(3) global [4 x i32]
+
+; Function Attrs: nounwind
+
+; Offset folding is an optimization done for global variables with relocations,
+; which allows you to store the offset in the r_addend of the relocation entry.
+; The offset is apllied to the variables address at link time, which eliminates
+; the need to emit shader instructions to do this calculation.
+; We don't use relocations for local memory, so we should never fold offsets
+; for local memory globals.
+
+; CHECK-LABEL: lds_no_offset:
+; CHECK ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:4
+define void @lds_no_offset() {
+entry:
+ %ptr = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds, i32 0, i32 1
+ store i32 0, i32 addrspace(3)* %ptr
+ ret void
+}