MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
+ if (ST.isAmdCodeObjectV2(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- if (MFI->hasPrivateMemoryInputPtr()) {
+ if (MFI->hasImplicitBufferPtr()) {
unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
BuildMI(MBB, I, DL, Mov64, Rsrc01)
- .addReg(PreloadedPrivateBufferReg)
+ .addReg(MFI->getImplicitBufferPtrUserSGPR())
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else {
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
MachineMemOperand::MODereferenceable,
0, 0);
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
- .addReg(PreloadedPrivateBufferReg)
+ .addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
.addImm(0) // glc
.addMemOperand(MMO)
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
- if (Info.hasPrivateMemoryInputPtr()) {
- unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI);
- MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(PrivateMemoryPtrReg);
+ if (Info.hasImplicitBufferPtr()) {
+ unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
+ MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ if (getSubtarget()->isAmdCodeObjectV2(MF))
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
+
+ unsigned Reg = TRI->getPreloadedValue(MF,
+ SIRegisterInfo::IMPLICIT_BUFFER_PTR);
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
}
case Intrinsic::amdgcn_dispatch_ptr:
WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false),
- PrivateMemoryInputPtr(false) {
+ ImplicitBufferPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
DispatchID = true;
} else if (ST.isMesaGfxShader(MF)) {
if (HasStackObjects || MaySpill)
- PrivateMemoryInputPtr = true;
+ ImplicitBufferPtr = true;
}
// We don't need to worry about accessing spills with flat instructions.
return FlatScratchInitUserSGPR;
}
-unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
- PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
+ ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
NumUserSGPRs += 2;
- return PrivateMemoryPtrUserSGPR;
+ return ImplicitBufferPtrUserSGPR;
}
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
unsigned StackPtrOffsetReg;
// Input registers for non-HSA ABI
- unsigned PrivateMemoryPtrUserSGPR;
+ unsigned ImplicitBufferPtrUserSGPR;
// Input registers setup for the HSA ABI.
// User SGPRs in allocation order.
// Private memory buffer
// Compute directly in sgpr[0:1]
// Other shaders indirect 64-bits at sgpr[0:1]
- bool PrivateMemoryInputPtr : 1;
+ bool ImplicitBufferPtr : 1;
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
- unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
+ unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
unsigned addWorkGroupIDX() {
return WorkItemIDZ;
}
- bool hasPrivateMemoryInputPtr() const {
- return PrivateMemoryInputPtr;
+ bool hasImplicitBufferPtr() const {
+ return ImplicitBufferPtr;
}
unsigned getNumUserSGPRs() const {
return QueuePtrUserSGPR;
}
- unsigned getPrivateMemoryPtrUserSGPR() const {
- return PrivateMemoryPtrUserSGPR;
+ unsigned getImplicitBufferPtrUserSGPR() const {
+ return ImplicitBufferPtrUserSGPR;
}
bool hasSpilledSGPRs() const {
case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
- if (ST.isAmdCodeObjectV2(MF)) {
- assert(MFI->hasPrivateSegmentBuffer());
- return MFI->PrivateSegmentBufferUserSGPR;
- }
- assert(MFI->hasPrivateMemoryInputPtr());
- return MFI->PrivateMemoryPtrUserSGPR;
+ assert(MFI->hasPrivateSegmentBuffer());
+ return MFI->PrivateSegmentBufferUserSGPR;
+ case SIRegisterInfo::IMPLICIT_BUFFER_PTR:
+ assert(MFI->hasImplicitBufferPtr());
+ return MFI->ImplicitBufferPtrUserSGPR;
case SIRegisterInfo::KERNARG_SEGMENT_PTR:
assert(MFI->hasKernargSegmentPtr());
return MFI->KernargSegmentPtrUserSGPR;
WORKGROUP_ID_Y = 11,
WORKGROUP_ID_Z = 12,
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
+ IMPLICIT_BUFFER_PTR = 15,
// VGPRS:
- FIRST_VGPR_VALUE = 15,
+ FIRST_VGPR_VALUE = 16,
WORKITEM_ID_X = FIRST_VGPR_VALUE,
- WORKITEM_ID_Y = 16,
- WORKITEM_ID_Z = 17
+ WORKITEM_ID_Y = 17,
+ WORKITEM_ID_Z = 18
};
/// \brief Returns the physical register that \p Value is stored in.
--- /dev/null
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa < %s 2>&1 | FileCheck -check-prefix=ERROR %s
+
+; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target
+define amdgpu_kernel void @test_kernel(i32 addrspace(1)* %out) #1 {
+ %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
+ %header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
+ %value = load i32, i32 addrspace(2)* %header_ptr
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; ERROR: in function test_func{{.*}}: non-hsa intrinsic with hsa target
+define void @test_func(i32 addrspace(1)* %out) #1 {
+ %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
+ %header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
+ %value = load i32, i32 addrspace(2)* %header_ptr
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: Requires stack object to not assert
+; GCN-LABEL: {{^}}test_ps:
+; GCN: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GCN: buffer_store_dword v0, off, s[4:7], s2 offset:4
+; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: ; return
+define amdgpu_ps i32 @test_ps() #1 {
+ %alloca = alloca i32
+ store volatile i32 0, i32* %alloca
+ %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
+ %buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
+ %value = load volatile i32, i32 addrspace(2)* %buffer_ptr
+ ret i32 %value
+}
+
+; GCN-LABEL: {{^}}test_cs:
+; GCN: s_mov_b64 s[4:5], s[0:1]
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:4
+; GCN: s_load_dword s0, s[0:1], 0x0
+define amdgpu_cs i32 @test_cs() #1 {
+ %alloca = alloca i32
+ store volatile i32 0, i32* %alloca
+ %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
+ %buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
+ %value = load volatile i32, i32 addrspace(2)* %buffer_ptr
+ ret i32 %value
+}
+
+declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind }