}
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
- bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ bool HasFlat = ST.hasFlatAddressSpace();
+ bool HasApertureRegs = ST.hasApertureRegs();
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
bool Changed = false;
bool NeedQueuePtr = false;
+ bool HaveCall = false;
bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
for (BasicBlock &BB : F) {
Function *Callee = CS.getCalledFunction();
// TODO: Do something with indirect calls.
- if (!Callee)
+ if (!Callee) {
+ if (!CS.isInlineAsm())
+ HaveCall = true;
continue;
+ }
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {
+ HaveCall = true;
copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
Changed = true;
} else {
Changed = true;
}
+ // TODO: We could refine this to captured pointers that could possibly be
+ // accessed by flat instructions. For now this is mostly a poor way of
+ // estimating whether there are calls before argument lowering.
+ if (HasFlat && !IsFunc && HaveCall) {
+ F.addFnAttr("amdgpu-flat-scratch");
+ Changed = true;
+ }
+
return Changed;
}
// this point it appears we need the setup. This part of the prolog should be
// emitted after frame indices are eliminated.
- if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
+ if (MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
unsigned SPReg = MFI->getStackPtrOffsetReg();
assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
DebugLoc DL;
- int64_t StackSize = MF.getFrameInfo().getStackSize();
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ int64_t StackSize = FrameInfo.getStackSize();
if (StackSize == 0) {
BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
}
}
- if (ST.isAmdCodeObjectV2(MF)) {
+ bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
- // We don't need to worry about accessing spills with flat instructions.
- // TODO: On VI where we must use flat for global, we should be able to omit
- // this if it is never used for generic access.
- if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS() &&
- isEntryFunction())
- FlatScratchInit = true;
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls that may require it before argument lowering.
+ if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ FlatScratchInit = true;
+ }
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
ret void
}
+; HSA: declare void @external.func() #15
+declare void @external.func() #3
+
+; HSA: define internal void @defined.func() #15 {
+define internal void @defined.func() #3 {
+ ret void
+}
+
+; HSA: define void @func_call_external() #15 {
+define void @func_call_external() #3 {
+ call void @external.func()
+ ret void
+}
+
+; HSA: define void @func_call_defined() #15 {
+define void @func_call_defined() #3 {
+ call void @defined.func()
+ ret void
+}
+
+; HSA: define void @func_call_asm() #15 {
+define void @func_call_asm() #3 {
+ call void asm sideeffect "", ""() #3
+ ret void
+}
+
+; HSA: define amdgpu_kernel void @kern_call_external() #16 {
+define amdgpu_kernel void @kern_call_external() #3 {
+ call void @external.func()
+ ret void
+}
+
+; HSA: define amdgpu_kernel void @func_kern_defined() #16 {
+define amdgpu_kernel void @func_kern_defined() #3 {
+ call void @defined.func()
+ ret void
+}
+
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind "target-cpu"="fiji" }
attributes #2 = { nounwind "target-cpu"="gfx900" }
+attributes #3 = { nounwind }
; HSA: attributes #0 = { nounwind readnone speculatable }
; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
+; HSA: attributes #15 = { nounwind }
+; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" }