NODE_NAME_CASE(CVT_F32_UBYTE3)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
+ NODE_NAME_CASE(PC_ADD_REL_OFFSET)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(INTERP_MOV)
INTERP_MOV,
INTERP_P1,
INTERP_P2,
+ PC_ADD_REL_OFFSET,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
+ const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx);
+ const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
+ MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+ MCOp = MCOperand::createExpr(Expr);
break;
}
case MachineOperand::MO_ExternalSymbol: {
return 2;
case FK_SecRel_4:
case FK_Data_4:
+ case FK_PCRel_4:
return 4;
case FK_SecRel_8:
case FK_Data_8:
break;
}
- case AMDGPU::fixup_si_rodata: {
- uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- // We emit constant data at the end of the text section and generate its
- // address using the following code sequence:
- // s_getpc_b64 s[0:1]
- // s_add_u32 s0, s0, $symbol
- // s_addc_u32 s1, s1, 0
- //
- // s_getpc_b64 returns the address of the s_add_u32 instruction and then
- // the fixup replaces $symbol with a literal constant, which is a
- // pc-relative offset from the encoding of the $symbol operand to the
- // constant data.
- //
- // What we want here is an offset from the start of the s_add_u32
- // instruction to the constant data, but since the encoding of $symbol
- // starts 4 bytes after the start of the add instruction, we end up
- // with an offset that is 4 bytes too small. This requires us to
- // add 4 to the fixup value before applying it.
- *Dst = Value + 4;
- break;
- }
default: {
// FIXME: Copied from AArch64
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
};
if (Kind < FirstTargetFixupKind)
class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
bool Is64Bit;
+ bool HasRelocationAddend;
public:
- ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) :
- AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { }
+ ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) :
+ AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
+ HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { }
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
- return createAMDGPUELFObjectWriter(Is64Bit, OS);
+ return createAMDGPUELFObjectWriter(Is64Bit, HasRelocationAddend, OS);
}
};
const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU) {
// Use 64-bit ELF for amdgcn
- return new ELFAMDGPUAsmBackend(T, TT.getArch() == Triple::amdgcn);
+ return new ELFAMDGPUAsmBackend(T, TT);
}
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
public:
- AMDGPUELFObjectWriter(bool Is64Bit);
+ AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend);
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override {
} // End anonymous namespace
-AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit)
- : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA,
- ELF::EM_AMDGPU, false) { }
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(Is64Bit,
+ ELF::ELFOSABI_AMDGPU_HSA,
+ ELF::EM_AMDGPU,
+ HasRelocationAddend) { }
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) {
- MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit);
+
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit,
+ bool HasRelocationAddend,
+ raw_pwrite_stream &OS) {
+ MCELFObjectTargetWriter *MOTW =
+ new AMDGPUELFObjectWriter(Is64Bit, HasRelocationAddend);
return createELFObjectWriter(MOTW, OS, true);
}
/// 16-bit PC relative fixup for SOPP branch instructions.
fixup_si_sopp_br = FirstTargetFixupKind,
- /// fixup for global addresses with constant initializers
- fixup_si_rodata,
-
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
const Triple &TT, StringRef CPU);
MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
+ bool HasRelocationAddend,
raw_pwrite_stream &OS);
} // End llvm namespace
return MRI.getEncodingValue(MO.getReg());
if (MO.isExpr()) {
- const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
- const MCSymbol &Sym = Expr->getSymbol();
+ const MCSymbolRefExpr *Expr = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
MCFixupKind Kind;
- if (Sym.isExternal())
+ if (Expr && Expr->getSymbol().isExternal())
Kind = FK_Data_4;
else
- Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
- Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
+ Kind = FK_PCRel_4;
+ Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc()));
}
// Figure out the operand number, needed for isSrcOperand check
return DAG.getUNDEF(ASC->getValueType(0));
}
+SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
+ SDValue Op,
+ SelectionDAG &DAG) const {
+ GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
+
+ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
+
+ SDLoc DL(GSD);
+ const GlobalValue *GV = GSD->getGlobal();
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
+
+ // In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
+ // lowered to the following code sequence:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol
+ // s_addc_u32 s1, s1, 0
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // a fixup or relocation is emitted to replace $symbol with a literal
+ // constant, which is a pc-relative offset from the encoding of the $symbol
+ // operand to the global variable.
+ //
+ // What we want here is an offset from the value returned by s_getpc
+ // (which is the address of the s_add_u32 instruction) to the global
+ // variable, but since the encoding of $symbol starts 4 bytes after the start
+ // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
+ // small. This requires us to add 4 to the global variable offset in order to
+ // compute the correct address.
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32,
+ GSD->getOffset() + 4);
+ return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA);
+}
+
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &DL,
SDValue Chain, unsigned Offset, bool Signed) const;
-
+ SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
+ SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
break;
}
- case AMDGPU::SI_CONSTDATA_PTR: {
+ case AMDGPU::SI_PC_ADD_REL_OFFSET: {
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
MachineFunction &MF = *MBB.getParent();
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
+def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
+ SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
+>;
+
//===----------------------------------------------------------------------===//
// PatFrags for FLAT instructions
//===----------------------------------------------------------------------===//
let ParserMatchClass = SoppBrTarget;
}
-def const_ga : Operand<iPTR>;
+def si_ga : Operand<iPTR>;
def InterpSlot : Operand<i32> {
let PrintMethod = "printInterpSlot";
let Defs = [SCC] in {
-def SI_CONSTDATA_PTR : InstSI <
+def SI_PC_ADD_REL_OFFSET : InstSI <
(outs SReg_64:$dst),
- (ins const_ga:$ptr),
- "", [(set SReg_64:$dst, (i64 (AMDGPUconstdata_ptr (tglobaladdr:$ptr))))]
+ (ins si_ga:$ptr),
+ "", [(set SReg_64:$dst, (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr))))]
> {
let SALU = 1;
}