From 0410b9ebccbfe41fec8cfd818fd7640763a4a2ed Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 21 Feb 2019 23:27:46 +0000 Subject: [PATCH] AMDGPU: Remove debugger related subtarget features As far as I know these aren't needed anymore. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPU.h | 4 - lib/Target/AMDGPU/AMDGPU.td | 18 ---- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 31 ------ .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 18 +--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 - lib/Target/AMDGPU/AMDGPUSubtarget.h | 14 --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 - lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 - lib/Target/AMDGPU/CMakeLists.txt | 1 - lib/Target/AMDGPU/SIDebuggerInsertNops.cpp | 96 ------------------- lib/Target/AMDGPU/SIFrameLowering.cpp | 48 +--------- lib/Target/AMDGPU/SIFrameLowering.h | 3 - lib/Target/AMDGPU/SIISelLowering.cpp | 32 ------- lib/Target/AMDGPU/SIISelLowering.h | 2 - lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 32 +++---- lib/Target/AMDGPU/SIMachineFunctionInfo.h | 30 ------ lib/Target/AMDGPU/SIProgramInfo.h | 12 --- test/CodeGen/AMDGPU/debugger-emit-prologue.ll | 81 ---------------- test/CodeGen/AMDGPU/debugger-insert-nops.ll | 80 ---------------- .../AMDGPU/hsa-metadata-kernel-debug-props.ll | 67 ------------- test/CodeGen/AMDGPU/syncscopes.ll | 2 +- 21 files changed, 14 insertions(+), 563 deletions(-) delete mode 100644 lib/Target/AMDGPU/SIDebuggerInsertNops.cpp delete mode 100644 test/CodeGen/AMDGPU/debugger-emit-prologue.ll delete mode 100644 test/CodeGen/AMDGPU/debugger-insert-nops.ll delete mode 100644 test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 344c3f4fbf7..ca3e6e2028d 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -50,7 +50,6 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); -FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIFixWWMLivenessPass(); FunctionPass *createSIFormMemoryClausesPass(); @@ -196,9 +195,6 @@ extern char &SIAnnotateControlFlowPassID; void initializeSIMemoryLegalizerPass(PassRegistry&); extern char &SIMemoryLegalizerID; -void initializeSIDebuggerInsertNopsPass(PassRegistry&); -extern char &SIDebuggerInsertNopsID; - void initializeSIModeRegisterPass(PassRegistry&); extern char &SIModeRegisterID; diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index bcea2550c98..588d9766001 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -578,24 +578,6 @@ def FeatureISAVersion9_0_9 : FeatureSet< FeatureXNACK, FeatureCodeObjectV3]>; -//===----------------------------------------------------------------------===// -// Debugger related subtarget features. -//===----------------------------------------------------------------------===// - -def FeatureDebuggerInsertNops : SubtargetFeature< - "amdgpu-debugger-insert-nops", - "DebuggerInsertNops", - "true", - "Insert one nop instruction for each high level source statement" ->; - -def FeatureDebuggerEmitPrologue : SubtargetFeature< - "amdgpu-debugger-emit-prologue", - "DebuggerEmitPrologue", - "true", - "Emit debugger prologue" ->; - //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 1b62b3f1d09..b9a84171f23 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -487,15 +487,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); - if (MF.getSubtarget().debuggerEmitPrologue()) { - OutStreamer->emitRawComment( - " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + - Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); - OutStreamer->emitRawComment( - " DebuggerPrivateSegmentBufferSGPR: s" + - Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false); - } - OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false); @@ -828,8 +819,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); - const SIInstrInfo *TII = STM.getInstrInfo(); - const SIRegisterInfo *RI = &TII->getRegisterInfo(); // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be @@ -921,16 +910,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( &STM, ProgInfo.NumVGPRsForWavesPerEU); - // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and - // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" - // attribute was requested. - if (STM.debuggerEmitPrologue()) { - ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR = - RI->getHWRegIndex(MFI->getScratchWaveOffsetReg()); - ProgInfo.DebuggerPrivateSegmentBufferSGPR = - RI->getHWRegIndex(MFI->getScratchRSrcReg()); - } - // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. ProgInfo.FloatMode = getFPMode(MF); @@ -1184,9 +1163,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (MFI->hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (STM.debuggerSupported()) - Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; - if (STM.isXNACKEnabled()) Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; @@ -1201,13 +1177,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, // 2^n. The minimum alignment is 2^4 = 16. Out.kernarg_segment_alignment = std::max((size_t)4, countTrailingZeros(MaxKernArgAlign)); - - if (STM.debuggerEmitPrologue()) { - Out.debug_wavefront_private_segment_offset_sgpr = - CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - Out.debug_private_segment_buffer_sgpr = - CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR; - } } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 15cee69e935..f93ccf65a45 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -239,23 +239,7 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, Kernel::DebugProps::Metadata MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const GCNSubtarget &STM = MF.getSubtarget(); - HSAMD::Kernel::DebugProps::Metadata HSADebugProps; - - if (!STM.debuggerSupported()) - return HSADebugProps; - - HSADebugProps.mDebuggerABIVersion.push_back(1); - HSADebugProps.mDebuggerABIVersion.push_back(0); - - if (STM.debuggerEmitPrologue()) { - HSADebugProps.mPrivateSegmentBufferSGPR = - ProgramInfo.DebuggerPrivateSegmentBufferSGPR; - HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = - ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - } - - return HSADebugProps; + return HSAMD::Kernel::DebugProps::Metadata(); } void MetadataStreamerV2::emitVersion() { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index cdfbfb5154e..cf90426574c 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -174,8 +174,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasApertureRegs(false), EnableXNACK(false), TrapHandler(false), - DebuggerInsertNops(false), - DebuggerEmitPrologue(false), EnableHugePrivateBuffer(false), EnableLoadStoreOpt(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 789fd2af4b9..0002e8e51be 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -295,8 +295,6 @@ protected: bool HasApertureRegs; bool EnableXNACK; bool TrapHandler; - bool DebuggerInsertNops; - bool DebuggerEmitPrologue; // Used as options. bool EnableHugePrivateBuffer; @@ -791,18 +789,6 @@ public: return EnableSIScheduler; } - bool debuggerSupported() const { - return debuggerInsertNops() && debuggerEmitPrologue(); - } - - bool debuggerInsertNops() const { - return DebuggerInsertNops; - } - - bool debuggerEmitPrologue() const { - return DebuggerEmitPrologue; - } - bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f9bd5002810..fdca43107ca 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -200,7 +200,6 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSILowerControlFlowPass(*PR); initializeSIInsertSkipsPass(*PR); initializeSIMemoryLegalizerPass(*PR); - initializeSIDebuggerInsertNopsPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); initializeSIFixWWMLivenessPass(*PR); initializeSIFormMemoryClausesPass(*PR); @@ -918,7 +917,6 @@ void GCNPassConfig::addPreEmitPass() { addPass(&PostRAHazardRecognizerID); addPass(&SIInsertSkipsPassID); - addPass(createSIDebuggerInsertNopsPass()); addPass(&BranchRelaxationPassID); } diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index e5840062cb8..e3749f58d52 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -77,8 +77,6 @@ class GCNTTIImpl final : public BasicTTIImplBase { AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureAutoWaitcntBeforeBarrier, - AMDGPU::FeatureDebuggerEmitPrologue, - AMDGPU::FeatureDebuggerInsertNops, // Property of the kernel/environment which can't actually differ. AMDGPU::FeatureSGPRInitBug, diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 442946c4543..109d95cec2c 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -92,7 +92,6 @@ add_llvm_target(AMDGPUCodeGen R600RegisterInfo.cpp SIAddIMGInit.cpp SIAnnotateControlFlow.cpp - SIDebuggerInsertNops.cpp SIFixSGPRCopies.cpp SIFixupVectorISel.cpp SIFixVGPRCopies.cpp diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp deleted file mode 100644 index e30da738831..00000000000 --- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===--- SIDebuggerInsertNops.cpp - Inserts nops for debugger usage -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Inserts one nop instruction for each high level source statement for -/// debugger usage. -/// -/// Tools, such as a debugger, need to pause execution based on user input (i.e. -/// breakpoint). In order to do this, one nop instruction is inserted before the -/// first isa instruction of each high level source statement. Further, the -/// debugger may replace nop instructions with trap instructions based on user -/// input. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUSubtarget.h" -#include "SIInstrInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -using namespace llvm; - -#define DEBUG_TYPE "si-debugger-insert-nops" -#define PASS_NAME "SI Debugger Insert Nops" - -namespace { - -class SIDebuggerInsertNops : public MachineFunctionPass { -public: - static char ID; - - SIDebuggerInsertNops() : MachineFunctionPass(ID) { } - StringRef getPassName() const override { return PASS_NAME; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; - -} // anonymous namespace - -INITIALIZE_PASS(SIDebuggerInsertNops, DEBUG_TYPE, PASS_NAME, false, false) - -char SIDebuggerInsertNops::ID = 0; -char &llvm::SIDebuggerInsertNopsID = SIDebuggerInsertNops::ID; - -FunctionPass *llvm::createSIDebuggerInsertNopsPass() { - return new SIDebuggerInsertNops(); -} - -bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { - // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not - // specified. - const GCNSubtarget &ST = MF.getSubtarget(); - if (!ST.debuggerInsertNops()) - return false; - - // Skip machine functions without debug info. - if (!MF.getMMI().hasDebugInfo()) - return false; - - // Target instruction info. - const SIInstrInfo *TII = ST.getInstrInfo(); - - // Set containing line numbers that have nop inserted. - DenseSet NopInserted; - - for (auto &MBB : MF) { - for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { - // Skip debug instructions and instructions without location. - if (MI->isDebugInstr() || !MI->getDebugLoc()) - continue; - - // Insert nop instruction if line number does not have nop inserted. - auto DL = MI->getDebugLoc(); - if (NopInserted.find(DL.getLine()) == NopInserted.end()) { - BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - NopInserted.insert(DL.getLine()); - } - } - } - - return true; -} diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 53688470400..2596a395219 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -217,12 +217,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was - // specified. - const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.debuggerEmitPrologue()) - emitDebuggerPrologue(MF, MBB); - assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -233,6 +227,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, // FIXME: We should be cleaning up these unused SGPR spill frame indices // somewhere. + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -778,47 +773,6 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } -void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const GCNSubtarget &ST = MF.getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - const SIRegisterInfo *TRI = &TII->getRegisterInfo(); - const SIMachineFunctionInfo *MFI = MF.getInfo(); - - MachineBasicBlock::iterator I = MBB.begin(); - DebugLoc DL; - - // For each dimension: - for (unsigned i = 0; i < 3; ++i) { - // Get work group ID SGPR, and make it live-in again. - unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i); - MF.getRegInfo().addLiveIn(WorkGroupIDSGPR); - MBB.addLiveIn(WorkGroupIDSGPR); - - // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in - // order to spill it to scratch. - unsigned WorkGroupIDVGPR = - MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR) - .addReg(WorkGroupIDSGPR); - - // Spill work group ID. - int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i); - TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false, - WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); - - // Get work item ID VGPR, and make it live-in again. - unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i); - MF.getRegInfo().addLiveIn(WorkItemIDVGPR); - MBB.addLiveIn(WorkItemIDVGPR); - - // Spill work item ID. - int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i); - TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false, - WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); - } -} - bool SIFrameLowering::hasFP(const MachineFunction &MF) const { // All stack operations are relative to the frame offset SGPR. // TODO: Still want to eliminate sometimes. diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index a8795e126f6..c5b707cba06 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -65,9 +65,6 @@ private: SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - /// Emits debugger prologue. - void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 96eb619497b..83d7711e801 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1862,7 +1862,6 @@ SDValue SITargetLowering::LowerFormalArguments( const Function &Fn = MF.getFunction(); FunctionType *FType = MF.getFunction().getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { DiagnosticInfoUnsupported NoGraphicsHSA( @@ -1871,11 +1870,6 @@ SDValue SITargetLowering::LowerFormalArguments( return DAG.getEntryNode(); } - // Create stack objects that are used for emitting debugger prologue if - // "amdgpu-debugger-emit-prologue" attribute was specified. - if (ST.debuggerEmitPrologue()) - createDebuggerPrologueStackObjects(MF); - SmallVector Splits; SmallVector ArgLocs; BitVector Skipped(Ins.size()); @@ -3962,32 +3956,6 @@ unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { return 0; } -void SITargetLowering::createDebuggerPrologueStackObjects( - MachineFunction &MF) const { - // Create stack objects that are used for emitting debugger prologue. - // - // Debugger prologue writes work group IDs and work item IDs to scratch memory - // at fixed location in the following format: - // offset 0: work group ID x - // offset 4: work group ID y - // offset 8: work group ID z - // offset 16: work item ID x - // offset 20: work item ID y - // offset 24: work item ID z - SIMachineFunctionInfo *Info = MF.getInfo(); - int ObjectIdx = 0; - - // For each dimension: - for (unsigned i = 0; i < 3; ++i) { - // Create fixed stack object for work group ID. - ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4, true); - Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx); - // Create fixed stack object for work item ID. - ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4 + 16, true); - Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx); - } -} - bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index bc5e1092a36..ee4c4bf826c 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -174,8 +174,6 @@ private: unsigned isCFIntrinsic(const SDNode *Intr) const; - void createDebuggerPrologueStackObjects(MachineFunction &MF) const; - /// \returns True if fixup needs to be emitted for given global value \p GV, /// false otherwise. bool shouldEmitFixup(const GlobalValue *GV) const; diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 22dc4c139ba..78e88abda68 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -87,33 +87,23 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } } - if (ST.debuggerEmitPrologue()) { - // Enable everything. + if (F.hasFnAttribute("amdgpu-work-group-id-x")) WorkGroupIDX = true; - WorkGroupIDY = true; - WorkGroupIDZ = true; - WorkItemIDX = true; - WorkItemIDY = true; - WorkItemIDZ = true; - } else { - if (F.hasFnAttribute("amdgpu-work-group-id-x")) - WorkGroupIDX = true; - if (F.hasFnAttribute("amdgpu-work-group-id-y")) - WorkGroupIDY = true; + if (F.hasFnAttribute("amdgpu-work-group-id-y")) + WorkGroupIDY = true; - if (F.hasFnAttribute("amdgpu-work-group-id-z")) - WorkGroupIDZ = true; + if (F.hasFnAttribute("amdgpu-work-group-id-z")) + WorkGroupIDZ = true; - if (F.hasFnAttribute("amdgpu-work-item-id-x")) - WorkItemIDX = true; + if (F.hasFnAttribute("amdgpu-work-item-id-x")) + WorkItemIDX = true; - if (F.hasFnAttribute("amdgpu-work-item-id-y")) - WorkItemIDY = true; + if (F.hasFnAttribute("amdgpu-work-item-id-y")) + WorkItemIDY = true; - if (F.hasFnAttribute("amdgpu-work-item-id-z")) - WorkItemIDZ = true; - } + if (F.hasFnAttribute("amdgpu-work-item-id-z")) + WorkItemIDZ = true; const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); bool HasStackObjects = FrameInfo.hasStackObjects(); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 79be2760e2b..de84fbbd776 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -123,12 +123,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // unit. Minimum - first, maximum - second. std::pair WavesPerEU = {0, 0}; - // Stack object indices for work group IDs. - std::array DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; - - // Stack object indices for work item IDs. - std::array DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; - DenseMap> BufferPSVs; DenseMap::max() if the register is not - // used or not known. - uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR = - std::numeric_limits::max(); - - // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire - // kernel execution, or std::numeric_limits::max() if the register - // is not used or not known. - uint16_t DebuggerPrivateSegmentBufferSGPR = - std::numeric_limits::max(); - // Whether there is recursion, dynamic allocas, indirect calls or some other // reason there may be statically unknown stack usage. bool DynamicCallStack = false; diff --git a/test/CodeGen/AMDGPU/debugger-emit-prologue.ll b/test/CodeGen/AMDGPU/debugger-emit-prologue.ll deleted file mode 100644 index b416537b9f8..00000000000 --- a/test/CodeGen/AMDGPU/debugger-emit-prologue.ll +++ /dev/null @@ -1,81 +0,0 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-code-object-v3 -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR -target datalayout = "A5" - -; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]] -; CHECK: debug_private_segment_buffer_sgpr = [[SREG:[0-9]+]] - -; CHECK: v_mov_b32_e32 [[WGIDX:v[0-9]+]], s{{[0-9]+}} -; CHECK: buffer_store_dword [[WGIDX]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] -; CHECK: buffer_store_dword v0, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:16 - -; CHECK: v_mov_b32_e32 [[WGIDY:v[0-9]+]], s{{[0-9]+}} -; CHECK: buffer_store_dword [[WGIDY]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:4 -; CHECK: buffer_store_dword v1, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:20 - -; CHECK: v_mov_b32_e32 [[WGIDZ:v[0-9]+]], s{{[0-9]+}} -; CHECK: buffer_store_dword [[WGIDZ]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:8 -; CHECK: buffer_store_dword v2, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:24 - -; CHECK: DebuggerWavefrontPrivateSegmentOffsetSGPR: s[[SOFF]] -; CHECK: DebuggerPrivateSegmentBufferSGPR: s[[SREG]] - -; NOATTR-NOT: DebuggerWavefrontPrivateSegmentOffsetSGPR -; NOATTR-NOT: DebuggerPrivateSegmentBufferSGPR - -; Function Attrs: nounwind -define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 { -entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20 - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22 - store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23 - %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24 - store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25 - ret void, !dbg !26 -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone } - -!llvm.dbg.cu = !{!0} -!opencl.kernels = !{!3} -!llvm.module.flags = !{!9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 269772)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing") -!2 = !{} -!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8} -!4 = !{!"kernel_arg_addr_space", i32 1} -!5 = !{!"kernel_arg_access_qual", !"none"} -!6 = !{!"kernel_arg_type", !"int addrspace(5)*"} -!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"} -!8 = !{!"kernel_arg_type_qual", !""} -!9 = !{i32 2, !"Dwarf Version", i32 2} -!10 = !{i32 2, !"Debug Info Version", i32 3} -!11 = !{!"clang version 3.9.0 (trunk 269772)"} -!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!13 = !DISubroutineType(types: !14) -!14 = !{null, !15} -!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32) -!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15) -!18 = !DIExpression() -!19 = !DILocation(line: 1, column: 30, scope: !12) -!20 = !DILocation(line: 2, column: 3, scope: !12) -!21 = !DILocation(line: 2, column: 8, scope: !12) -!22 = !DILocation(line: 3, column: 3, scope: !12) -!23 = !DILocation(line: 3, column: 8, scope: !12) -!24 = !DILocation(line: 4, column: 3, scope: !12) -!25 = !DILocation(line: 4, column: 8, scope: !12) -!26 = !DILocation(line: 5, column: 1, scope: !12) diff --git a/test/CodeGen/AMDGPU/debugger-insert-nops.ll b/test/CodeGen/AMDGPU/debugger-insert-nops.ll deleted file mode 100644 index f0947e49873..00000000000 --- a/test/CodeGen/AMDGPU/debugger-insert-nops.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP -target datalayout = "A5" - -; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each. - -; Check that each line appears at least once -; CHECK-DAG: test01.cl:2:3 -; CHECK-DAG: test01.cl:3:3 -; CHECK-DAG: test01.cl:4:3 - - -; Check that each of each of the lines consists of the line output, followed by "s_nop 0" -; CHECKNOP: test01.cl:{{[234]}}:3 -; CHECKNOP-NEXT: s_nop 0 -; CHECKNOP: test01.cl:{{[234]}}:3 -; CHECKNOP-NEXT: s_nop 0 -; CHECKNOP: test01.cl:{{[234]}}:3 -; CHECKNOP-NEXT: s_nop 0 - -; CHECK: test01.cl:5:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: s_endpgm - -; Function Attrs: nounwind -define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 { -entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20 - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22 - store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23 - %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24 - store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25 - ret void, !dbg !26 -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } - -!llvm.dbg.cu = !{!0} -!opencl.kernels = !{!3} -!llvm.module.flags = !{!9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing") -!2 = !{} -!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8} -!4 = !{!"kernel_arg_addr_space", i32 1} -!5 = !{!"kernel_arg_access_qual", !"none"} -!6 = !{!"kernel_arg_type", !"int addrspace(5)*"} -!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"} -!8 = !{!"kernel_arg_type_qual", !""} -!9 = !{i32 2, !"Dwarf Version", i32 2} -!10 = !{i32 2, !"Debug Info Version", i32 3} -!11 = !{!"clang version 3.9.0 (trunk 268929)"} -!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!13 = !DISubroutineType(types: !14) -!14 = !{null, !15} -!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32) -!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15) -!18 = !DIExpression() -!19 = !DILocation(line: 1, column: 30, scope: !12) -!20 = !DILocation(line: 2, column: 3, scope: !12) -!21 = !DILocation(line: 2, column: 8, scope: !12) -!22 = !DILocation(line: 3, column: 3, scope: !12) -!23 = !DILocation(line: 3, column: 8, scope: !12) -!24 = !DILocation(line: 4, column: 3, scope: !12) -!25 = !DILocation(line: 4, column: 8, scope: !12) -!26 = !DILocation(line: 5, column: 1, scope: !12) diff --git a/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll deleted file mode 100644 index 7eacdc1cdab..00000000000 --- a/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -target datalayout = "A5" - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] - -; CHECK: Kernels: -; CHECK: - Name: test -; CHECK: SymbolName: 'test@kd' -; CHECK: DebugProps: -; CHECK: DebuggerABIVersion: [ 1, 0 ] -; CHECK: PrivateSegmentBufferSGPR: 0 -; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 -define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 { -entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 0, !dbg !19 - store i32 777, i32 addrspace(1)* %arrayidx, align 4, !dbg !20 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !21 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i64 1, !dbg !21 - store i32 888, i32 addrspace(1)* %arrayidx1, align 4, !dbg !22 - %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !23 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 2, !dbg !23 - store i32 999, i32 addrspace(1)* %arrayidx2, align 4, !dbg !24 - ret void, !dbg !25 -} - -attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx800" "target-features"="+16-bit-insts,-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops,+amdgpu-debugger-reserve-regs,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.dbg.cu = !{!0} -!opencl.ocl.version = !{!3} -!llvm.module.flags = !{!4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "code-object-metadata-kernel-debug-props.cl", directory: "/some/random/directory") -!2 = !{} -!3 = !{i32 1, i32 0} -!4 = !{i32 2, !"Dwarf Version", i32 2} -!5 = !{i32 2, !"Debug Info Version", i32 3} -!6 = !{!"clang version 5.0.0"} -!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{null, !10} -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) -!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !{i32 1} -!13 = !{!"none"} -!14 = !{!"int addrspace(5)*"} -!15 = !{!""} -!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) -!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef) -!18 = !DILocation(line: 1, column: 30, scope: !7) -!19 = !DILocation(line: 2, column: 3, scope: !7) -!20 = !DILocation(line: 2, column: 8, scope: !7) -!21 = !DILocation(line: 3, column: 3, scope: !7) -!22 = !DILocation(line: 3, column: 8, scope: !7) -!23 = !DILocation(line: 4, column: 3, scope: !7) -!24 = !DILocation(line: 4, column: 8, scope: !7) -!25 = !DILocation(line: 5, column: 1, scope: !7) diff --git a/test/CodeGen/AMDGPU/syncscopes.ll b/test/CodeGen/AMDGPU/syncscopes.ll index 83cf6d45e24..413b7654e05 100644 --- a/test/CodeGen/AMDGPU/syncscopes.ll +++ b/test/CodeGen/AMDGPU/syncscopes.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: name: syncscopes ; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out) -- 2.50.1