From 0410b9ebccbfe41fec8cfd818fd7640763a4a2ed Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 21 Feb 2019 23:27:46 +0000
Subject: [PATCH] AMDGPU: Remove debugger related subtarget features

As far as I know these aren't needed anymore.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354634 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AMDGPU/AMDGPU.h                    |  4 -
 lib/Target/AMDGPU/AMDGPU.td                   | 18 ----
 lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp        | 31 ------
 .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp      | 18 +---
 lib/Target/AMDGPU/AMDGPUSubtarget.cpp         |  2 -
 lib/Target/AMDGPU/AMDGPUSubtarget.h           | 14 ---
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp     |  2 -
 lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h |  2 -
 lib/Target/AMDGPU/CMakeLists.txt              |  1 -
 lib/Target/AMDGPU/SIDebuggerInsertNops.cpp    | 96 -------------------
 lib/Target/AMDGPU/SIFrameLowering.cpp         | 48 +---------
 lib/Target/AMDGPU/SIFrameLowering.h           |  3 -
 lib/Target/AMDGPU/SIISelLowering.cpp          | 32 -------
 lib/Target/AMDGPU/SIISelLowering.h            |  2 -
 lib/Target/AMDGPU/SIMachineFunctionInfo.cpp   | 32 +++----
 lib/Target/AMDGPU/SIMachineFunctionInfo.h     | 30 ------
 lib/Target/AMDGPU/SIProgramInfo.h             | 12 ---
 test/CodeGen/AMDGPU/debugger-emit-prologue.ll | 81 ----------------
 test/CodeGen/AMDGPU/debugger-insert-nops.ll   | 80 ----------------
 .../AMDGPU/hsa-metadata-kernel-debug-props.ll | 67 -------------
 test/CodeGen/AMDGPU/syncscopes.ll             |  2 +-
 21 files changed, 14 insertions(+), 563 deletions(-)
 delete mode 100644 lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
 delete mode 100644 test/CodeGen/AMDGPU/debugger-emit-prologue.ll
 delete mode 100644 test/CodeGen/AMDGPU/debugger-insert-nops.ll
 delete mode 100644 test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll

diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 344c3f4fbf7..ca3e6e2028d 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -50,7 +50,6 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
 FunctionPass *createSIFixSGPRCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
-FunctionPass *createSIDebuggerInsertNopsPass();
 FunctionPass *createSIInsertWaitcntsPass();
 FunctionPass *createSIFixWWMLivenessPass();
 FunctionPass *createSIFormMemoryClausesPass();
@@ -196,9 +195,6 @@ extern char &SIAnnotateControlFlowPassID;
 void initializeSIMemoryLegalizerPass(PassRegistry&);
 extern char &SIMemoryLegalizerID;
 
-void initializeSIDebuggerInsertNopsPass(PassRegistry&);
-extern char &SIDebuggerInsertNopsID;
-
 void initializeSIModeRegisterPass(PassRegistry&);
 extern char &SIModeRegisterID;
 
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index bcea2550c98..588d9766001 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -578,24 +578,6 @@ def FeatureISAVersion9_0_9 : FeatureSet<
    FeatureXNACK,
    FeatureCodeObjectV3]>;
 
-//===----------------------------------------------------------------------===//
-// Debugger related subtarget features.
-//===----------------------------------------------------------------------===//
-
-def FeatureDebuggerInsertNops : SubtargetFeature<
-  "amdgpu-debugger-insert-nops",
-  "DebuggerInsertNops",
-  "true",
-  "Insert one nop instruction for each high level source statement"
->;
-
-def FeatureDebuggerEmitPrologue : SubtargetFeature<
-  "amdgpu-debugger-emit-prologue",
-  "DebuggerEmitPrologue",
-  "true",
-  "Emit debugger prologue"
->;
-
 //===----------------------------------------------------------------------===//
 
 def AMDGPUInstrInfo : InstrInfo {
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 1b62b3f1d09..b9a84171f23 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -487,15 +487,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     OutStreamer->emitRawComment(
       " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
 
-    if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) {
-      OutStreamer->emitRawComment(
-        " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
-        Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
-      OutStreamer->emitRawComment(
-        " DebuggerPrivateSegmentBufferSGPR: s" +
-        Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
-    }
-
     OutStreamer->emitRawComment(
       " COMPUTE_PGM_RSRC2:USER_SGPR: " +
       Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
@@ -828,8 +819,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const SIInstrInfo *TII = STM.getInstrInfo();
-  const SIRegisterInfo *RI = &TII->getRegisterInfo();
 
   // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
   // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
@@ -921,16 +910,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
       &STM, ProgInfo.NumVGPRsForWavesPerEU);
 
-  // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
-  // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
-  // attribute was requested.
-  if (STM.debuggerEmitPrologue()) {
-    ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
-      RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
-    ProgInfo.DebuggerPrivateSegmentBufferSGPR =
-      RI->getHWRegIndex(MFI->getScratchRSrcReg());
-  }
-
   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
   // register.
   ProgInfo.FloatMode = getFPMode(MF);
@@ -1184,9 +1163,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   if (MFI->hasDispatchPtr())
     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
-  if (STM.debuggerSupported())
-    Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
-
   if (STM.isXNACKEnabled())
     Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
 
@@ -1201,13 +1177,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   // 2^n.  The minimum alignment is 2^4 = 16.
   Out.kernarg_segment_alignment = std::max((size_t)4,
       countTrailingZeros(MaxKernArgAlign));
-
-  if (STM.debuggerEmitPrologue()) {
-    Out.debug_wavefront_private_segment_offset_sgpr =
-      CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
-    Out.debug_private_segment_buffer_sgpr =
-      CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
-  }
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 15cee69e935..f93ccf65a45 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -239,23 +239,7 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
 Kernel::DebugProps::Metadata
 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
                                      const SIProgramInfo &ProgramInfo) const {
-  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
-  HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
-
-  if (!STM.debuggerSupported())
-    return HSADebugProps;
-
-  HSADebugProps.mDebuggerABIVersion.push_back(1);
-  HSADebugProps.mDebuggerABIVersion.push_back(0);
-
-  if (STM.debuggerEmitPrologue()) {
-    HSADebugProps.mPrivateSegmentBufferSGPR =
-        ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
-    HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
-        ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
-  }
-
-  return HSADebugProps;
+  return HSAMD::Kernel::DebugProps::Metadata();
 }
 
 void MetadataStreamerV2::emitVersion() {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index cdfbfb5154e..cf90426574c 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -174,8 +174,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HasApertureRegs(false),
     EnableXNACK(false),
     TrapHandler(false),
-    DebuggerInsertNops(false),
-    DebuggerEmitPrologue(false),
 
     EnableHugePrivateBuffer(false),
     EnableLoadStoreOpt(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 789fd2af4b9..0002e8e51be 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -295,8 +295,6 @@ protected:
   bool HasApertureRegs;
   bool EnableXNACK;
   bool TrapHandler;
-  bool DebuggerInsertNops;
-  bool DebuggerEmitPrologue;
 
   // Used as options.
   bool EnableHugePrivateBuffer;
@@ -791,18 +789,6 @@ public:
     return EnableSIScheduler;
   }
 
-  bool debuggerSupported() const {
-    return debuggerInsertNops() && debuggerEmitPrologue();
-  }
-
-  bool debuggerInsertNops() const {
-    return DebuggerInsertNops;
-  }
-
-  bool debuggerEmitPrologue() const {
-    return DebuggerEmitPrologue;
-  }
-
   bool loadStoreOptEnabled() const {
     return EnableLoadStoreOpt;
   }
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f9bd5002810..fdca43107ca 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -200,7 +200,6 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeSILowerControlFlowPass(*PR);
   initializeSIInsertSkipsPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
-  initializeSIDebuggerInsertNopsPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
   initializeSIFixWWMLivenessPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
@@ -918,7 +917,6 @@ void GCNPassConfig::addPreEmitPass() {
   addPass(&PostRAHazardRecognizerID);
 
   addPass(&SIInsertSkipsPassID);
-  addPass(createSIDebuggerInsertNopsPass());
   addPass(&BranchRelaxationPassID);
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index e5840062cb8..e3749f58d52 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -77,8 +77,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
     AMDGPU::FeatureUnalignedScratchAccess,
 
     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
-    AMDGPU::FeatureDebuggerEmitPrologue,
-    AMDGPU::FeatureDebuggerInsertNops,
 
     // Property of the kernel/environment which can't actually differ.
     AMDGPU::FeatureSGPRInitBug,
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 442946c4543..109d95cec2c 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -92,7 +92,6 @@ add_llvm_target(AMDGPUCodeGen
   R600RegisterInfo.cpp
   SIAddIMGInit.cpp
   SIAnnotateControlFlow.cpp
-  SIDebuggerInsertNops.cpp
   SIFixSGPRCopies.cpp
   SIFixupVectorISel.cpp
   SIFixVGPRCopies.cpp
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
deleted file mode 100644
index e30da738831..00000000000
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-//===--- SIDebuggerInsertNops.cpp - Inserts nops for debugger usage -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Inserts one nop instruction for each high level source statement for
-/// debugger usage.
-///
-/// Tools, such as a debugger, need to pause execution based on user input (i.e.
-/// breakpoint). In order to do this, one nop instruction is inserted before the
-/// first isa instruction of each high level source statement. Further, the
-/// debugger may replace nop instructions with trap instructions based on user
-/// input.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "si-debugger-insert-nops"
-#define PASS_NAME "SI Debugger Insert Nops"
-
-namespace {
-
-class SIDebuggerInsertNops : public MachineFunctionPass {
-public:
-  static char ID;
-
-  SIDebuggerInsertNops() : MachineFunctionPass(ID) { }
-  StringRef getPassName() const override { return PASS_NAME; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // anonymous namespace
-
-INITIALIZE_PASS(SIDebuggerInsertNops, DEBUG_TYPE, PASS_NAME, false, false)
-
-char SIDebuggerInsertNops::ID = 0;
-char &llvm::SIDebuggerInsertNopsID = SIDebuggerInsertNops::ID;
-
-FunctionPass *llvm::createSIDebuggerInsertNopsPass() {
-  return new SIDebuggerInsertNops();
-}
-
-bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
-  // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
-  // specified.
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (!ST.debuggerInsertNops())
-    return false;
-
-  // Skip machine functions without debug info.
-  if (!MF.getMMI().hasDebugInfo())
-    return false;
-
-  // Target instruction info.
-  const SIInstrInfo *TII = ST.getInstrInfo();
-
-  // Set containing line numbers that have nop inserted.
-  DenseSet<unsigned> NopInserted;
-
-  for (auto &MBB : MF) {
-    for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
-      // Skip debug instructions and instructions without location.
-      if (MI->isDebugInstr() || !MI->getDebugLoc())
-        continue;
-
-      // Insert nop instruction if line number does not have nop inserted.
-      auto DL = MI->getDebugLoc();
-      if (NopInserted.find(DL.getLine()) == NopInserted.end()) {
-        BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP))
-          .addImm(0);
-        NopInserted.insert(DL.getLine());
-      }
-    }
-  }
-
-  return true;
-}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 53688470400..2596a395219 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -217,12 +217,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
 
 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
                                                 MachineBasicBlock &MBB) const {
-  // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
-  // specified.
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (ST.debuggerEmitPrologue())
-    emitDebuggerPrologue(MF, MBB);
-
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
 
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -233,6 +227,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
   // FIXME: We should be cleaning up these unused SGPR spill frame indices
   // somewhere.
 
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -778,47 +773,6 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
   return MBB.erase(I);
 }
 
-void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
-                                           MachineBasicBlock &MBB) const {
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
-  MachineBasicBlock::iterator I = MBB.begin();
-  DebugLoc DL;
-
-  // For each dimension:
-  for (unsigned i = 0; i < 3; ++i) {
-    // Get work group ID SGPR, and make it live-in again.
-    unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
-    MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
-    MBB.addLiveIn(WorkGroupIDSGPR);
-
-    // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
-    // order to spill it to scratch.
-    unsigned WorkGroupIDVGPR =
-      MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
-      .addReg(WorkGroupIDSGPR);
-
-    // Spill work group ID.
-    int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
-    TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
-      WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
-
-    // Get work item ID VGPR, and make it live-in again.
-    unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
-    MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
-    MBB.addLiveIn(WorkItemIDVGPR);
-
-    // Spill work item ID.
-    int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
-    TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
-      WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
-  }
-}
-
 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
   // All stack operations are relative to the frame offset SGPR.
   // TODO: Still want to eliminate sometimes.
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index a8795e126f6..c5b707cba06 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -65,9 +65,6 @@ private:
     SIMachineFunctionInfo *MFI,
     MachineFunction &MF) const;
 
-  /// Emits debugger prologue.
-  void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
   void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF,
       MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 96eb619497b..83d7711e801 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1862,7 +1862,6 @@ SDValue SITargetLowering::LowerFormalArguments(
   const Function &Fn = MF.getFunction();
   FunctionType *FType = MF.getFunction().getFunctionType();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
   if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
     DiagnosticInfoUnsupported NoGraphicsHSA(
@@ -1871,11 +1870,6 @@ SDValue SITargetLowering::LowerFormalArguments(
     return DAG.getEntryNode();
   }
 
-  // Create stack objects that are used for emitting debugger prologue if
-  // "amdgpu-debugger-emit-prologue" attribute was specified.
-  if (ST.debuggerEmitPrologue())
-    createDebuggerPrologueStackObjects(MF);
-
   SmallVector<ISD::InputArg, 16> Splits;
   SmallVector<CCValAssign, 16> ArgLocs;
   BitVector Skipped(Ins.size());
@@ -3962,32 +3956,6 @@ unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
   return 0;
 }
 
-void SITargetLowering::createDebuggerPrologueStackObjects(
-    MachineFunction &MF) const {
-  // Create stack objects that are used for emitting debugger prologue.
-  //
-  // Debugger prologue writes work group IDs and work item IDs to scratch memory
-  // at fixed location in the following format:
-  //   offset 0:  work group ID x
-  //   offset 4:  work group ID y
-  //   offset 8:  work group ID z
-  //   offset 16: work item ID x
-  //   offset 20: work item ID y
-  //   offset 24: work item ID z
-  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  int ObjectIdx = 0;
-
-  // For each dimension:
-  for (unsigned i = 0; i < 3; ++i) {
-    // Create fixed stack object for work group ID.
-    ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4, true);
-    Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
-    // Create fixed stack object for work item ID.
-    ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4 + 16, true);
-    Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
-  }
-}
-
 bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
   const Triple &TT = getTargetMachine().getTargetTriple();
   return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index bc5e1092a36..ee4c4bf826c 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -174,8 +174,6 @@ private:
 
   unsigned isCFIntrinsic(const SDNode *Intr) const;
 
-  void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
-
   /// \returns True if fixup needs to be emitted for given global value \p GV,
   /// false otherwise.
   bool shouldEmitFixup(const GlobalValue *GV) const;
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 22dc4c139ba..78e88abda68 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -87,33 +87,23 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     }
   }
 
-  if (ST.debuggerEmitPrologue()) {
-    // Enable everything.
+  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
     WorkGroupIDX = true;
-    WorkGroupIDY = true;
-    WorkGroupIDZ = true;
-    WorkItemIDX = true;
-    WorkItemIDY = true;
-    WorkItemIDZ = true;
-  } else {
-    if (F.hasFnAttribute("amdgpu-work-group-id-x"))
-      WorkGroupIDX = true;
 
-    if (F.hasFnAttribute("amdgpu-work-group-id-y"))
-      WorkGroupIDY = true;
+  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+    WorkGroupIDY = true;
 
-    if (F.hasFnAttribute("amdgpu-work-group-id-z"))
-      WorkGroupIDZ = true;
+  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+    WorkGroupIDZ = true;
 
-    if (F.hasFnAttribute("amdgpu-work-item-id-x"))
-      WorkItemIDX = true;
+  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+    WorkItemIDX = true;
 
-    if (F.hasFnAttribute("amdgpu-work-item-id-y"))
-      WorkItemIDY = true;
+  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+    WorkItemIDY = true;
 
-    if (F.hasFnAttribute("amdgpu-work-item-id-z"))
-      WorkItemIDZ = true;
-  }
+  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+    WorkItemIDZ = true;
 
   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
   bool HasStackObjects = FrameInfo.hasStackObjects();
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 79be2760e2b..de84fbbd776 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -123,12 +123,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   // unit. Minimum - first, maximum - second.
   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 
-  // Stack object indices for work group IDs.
-  std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
-
-  // Stack object indices for work item IDs.
-  std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
-
   DenseMap<const Value *,
            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
   DenseMap<const Value *,
@@ -564,30 +558,6 @@ public:
     return WavesPerEU.second;
   }
 
-  /// \returns Stack object index for \p Dim's work group ID.
-  int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
-    assert(Dim < 3);
-    return DebuggerWorkGroupIDStackObjectIndices[Dim];
-  }
-
-  /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
-  void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
-    assert(Dim < 3);
-    DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
-  }
-
-  /// \returns Stack object index for \p Dim's work item ID.
-  int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
-    assert(Dim < 3);
-    return DebuggerWorkItemIDStackObjectIndices[Dim];
-  }
-
-  /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
-  void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
-    assert(Dim < 3);
-    DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
-  }
-
   /// \returns SGPR used for \p Dim's work group ID.
   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
     switch (Dim) {
diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h
index 279067baf6c..0b475919e6d 100644
--- a/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/lib/Target/AMDGPU/SIProgramInfo.h
@@ -49,18 +49,6 @@ struct SIProgramInfo {
     // Number of VGPRs that meets number of waves per execution unit request.
     uint32_t NumVGPRsForWavesPerEU = 0;
 
-    // Fixed SGPR number used to hold wave scratch offset for entire kernel
-    // execution, or std::numeric_limits<uint16_t>::max() if the register is not
-    // used or not known.
-    uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
-        std::numeric_limits<uint16_t>::max();
-
-    // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
-    // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
-    // is not used or not known.
-    uint16_t DebuggerPrivateSegmentBufferSGPR =
-        std::numeric_limits<uint16_t>::max();
-
     // Whether there is recursion, dynamic allocas, indirect calls or some other
     // reason there may be statically unknown stack usage.
     bool DynamicCallStack = false;
diff --git a/test/CodeGen/AMDGPU/debugger-emit-prologue.ll b/test/CodeGen/AMDGPU/debugger-emit-prologue.ll
deleted file mode 100644
index b416537b9f8..00000000000
--- a/test/CodeGen/AMDGPU/debugger-emit-prologue.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-code-object-v3 -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR
-target datalayout = "A5"
-
-; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]]
-; CHECK: debug_private_segment_buffer_sgpr = [[SREG:[0-9]+]]
-
-; CHECK: v_mov_b32_e32 [[WGIDX:v[0-9]+]], s{{[0-9]+}}
-; CHECK: buffer_store_dword [[WGIDX]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]]
-; CHECK: buffer_store_dword v0, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:16
-
-; CHECK: v_mov_b32_e32 [[WGIDY:v[0-9]+]], s{{[0-9]+}}
-; CHECK: buffer_store_dword [[WGIDY]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:4
-; CHECK: buffer_store_dword v1, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:20
-
-; CHECK: v_mov_b32_e32 [[WGIDZ:v[0-9]+]], s{{[0-9]+}}
-; CHECK: buffer_store_dword [[WGIDZ]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:8
-; CHECK: buffer_store_dword v2, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:24
-
-; CHECK: DebuggerWavefrontPrivateSegmentOffsetSGPR: s[[SOFF]]
-; CHECK: DebuggerPrivateSegmentBufferSGPR: s[[SREG]]
-
-; NOATTR-NOT: DebuggerWavefrontPrivateSegmentOffsetSGPR
-; NOATTR-NOT: DebuggerPrivateSegmentBufferSGPR
-
-; Function Attrs: nounwind
-define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 {
-entry:
-  %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
-  store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
-  call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19
-  %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
-  store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
-  %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
-  store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
-  %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
-  store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
-  ret void, !dbg !26
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!opencl.kernels = !{!3}
-!llvm.module.flags = !{!9, !10}
-!llvm.ident = !{!11}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 269772)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing")
-!2 = !{}
-!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8}
-!4 = !{!"kernel_arg_addr_space", i32 1}
-!5 = !{!"kernel_arg_access_qual", !"none"}
-!6 = !{!"kernel_arg_type", !"int addrspace(5)*"}
-!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"}
-!8 = !{!"kernel_arg_type_qual", !""}
-!9 = !{i32 2, !"Dwarf Version", i32 2}
-!10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !{!"clang version 3.9.0 (trunk 269772)"}
-!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
-!13 = !DISubroutineType(types: !14)
-!14 = !{null, !15}
-!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
-!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
-!18 = !DIExpression()
-!19 = !DILocation(line: 1, column: 30, scope: !12)
-!20 = !DILocation(line: 2, column: 3, scope: !12)
-!21 = !DILocation(line: 2, column: 8, scope: !12)
-!22 = !DILocation(line: 3, column: 3, scope: !12)
-!23 = !DILocation(line: 3, column: 8, scope: !12)
-!24 = !DILocation(line: 4, column: 3, scope: !12)
-!25 = !DILocation(line: 4, column: 8, scope: !12)
-!26 = !DILocation(line: 5, column: 1, scope: !12)
diff --git a/test/CodeGen/AMDGPU/debugger-insert-nops.ll b/test/CodeGen/AMDGPU/debugger-insert-nops.ll
deleted file mode 100644
index f0947e49873..00000000000
--- a/test/CodeGen/AMDGPU/debugger-insert-nops.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP
-target datalayout = "A5"
-
-; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each.
-
-; Check that each line appears at least once
-; CHECK-DAG: test01.cl:2:3
-; CHECK-DAG: test01.cl:3:3
-; CHECK-DAG: test01.cl:4:3
-
-
-; Check that each of each of the lines consists of the line output, followed by "s_nop 0"
-; CHECKNOP: test01.cl:{{[234]}}:3
-; CHECKNOP-NEXT: s_nop 0
-; CHECKNOP: test01.cl:{{[234]}}:3
-; CHECKNOP-NEXT: s_nop 0
-; CHECKNOP: test01.cl:{{[234]}}:3
-; CHECKNOP-NEXT: s_nop 0
-
-; CHECK: test01.cl:5:{{[0-9]+}}
-; CHECK-NEXT: s_nop 0
-; CHECK-NEXT: s_endpgm
-
-; Function Attrs: nounwind
-define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 {
-entry:
-  %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
-  store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
-  call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19
-  %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
-  store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20
-  %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
-  store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
-  %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
-  store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
-  ret void, !dbg !26
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!opencl.kernels = !{!3}
-!llvm.module.flags = !{!9, !10}
-!llvm.ident = !{!11}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing")
-!2 = !{}
-!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8}
-!4 = !{!"kernel_arg_addr_space", i32 1}
-!5 = !{!"kernel_arg_access_qual", !"none"}
-!6 = !{!"kernel_arg_type", !"int addrspace(5)*"}
-!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"}
-!8 = !{!"kernel_arg_type_qual", !""}
-!9 = !{i32 2, !"Dwarf Version", i32 2}
-!10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !{!"clang version 3.9.0 (trunk 268929)"}
-!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
-!13 = !DISubroutineType(types: !14)
-!14 = !{null, !15}
-!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
-!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
-!18 = !DIExpression()
-!19 = !DILocation(line: 1, column: 30, scope: !12)
-!20 = !DILocation(line: 2, column: 3, scope: !12)
-!21 = !DILocation(line: 2, column: 8, scope: !12)
-!22 = !DILocation(line: 3, column: 3, scope: !12)
-!23 = !DILocation(line: 3, column: 8, scope: !12)
-!24 = !DILocation(line: 4, column: 3, scope: !12)
-!25 = !DILocation(line: 4, column: 8, scope: !12)
-!26 = !DILocation(line: 5, column: 1, scope: !12)
diff --git a/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll
deleted file mode 100644
index 7eacdc1cdab..00000000000
--- a/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
-target datalayout = "A5"
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata)
-
-; CHECK: ---
-; CHECK:  Version: [ 1, 0 ]
-
-; CHECK:  Kernels:
-; CHECK:    - Name:       test
-; CHECK:      SymbolName: 'test@kd'
-; CHECK:      DebugProps:
-; CHECK:        DebuggerABIVersion:                [ 1, 0 ]
-; CHECK:        PrivateSegmentBufferSGPR:          0
-; CHECK:        WavefrontPrivateSegmentOffsetSGPR: 11
-define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {
-entry:
-  %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
-  store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
-  call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18
-  %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 0, !dbg !19
-  store i32 777, i32 addrspace(1)* %arrayidx, align 4, !dbg !20
-  %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !21
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i64 1, !dbg !21
-  store i32 888, i32 addrspace(1)* %arrayidx1, align 4, !dbg !22
-  %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !23
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 2, !dbg !23
-  store i32 999, i32 addrspace(1)* %arrayidx2, align 4, !dbg !24
-  ret void, !dbg !25
-}
-
-attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx800" "target-features"="+16-bit-insts,-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops,+amdgpu-debugger-reserve-regs,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!opencl.ocl.version = !{!3}
-!llvm.module.flags = !{!4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "code-object-metadata-kernel-debug-props.cl", directory: "/some/random/directory")
-!2 = !{}
-!3 = !{i32 1, i32 0}
-!4 = !{i32 2, !"Dwarf Version", i32 2}
-!5 = !{i32 2, !"Debug Info Version", i32 3}
-!6 = !{!"clang version 5.0.0"}
-!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null, !10}
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
-!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!12 = !{i32 1}
-!13 = !{!"none"}
-!14 = !{!"int addrspace(5)*"}
-!15 = !{!""}
-!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
-!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
-!18 = !DILocation(line: 1, column: 30, scope: !7)
-!19 = !DILocation(line: 2, column: 3, scope: !7)
-!20 = !DILocation(line: 2, column: 8, scope: !7)
-!21 = !DILocation(line: 3, column: 3, scope: !7)
-!22 = !DILocation(line: 3, column: 8, scope: !7)
-!23 = !DILocation(line: 4, column: 3, scope: !7)
-!24 = !DILocation(line: 4, column: 8, scope: !7)
-!25 = !DILocation(line: 5, column: 1, scope: !7)
diff --git a/test/CodeGen/AMDGPU/syncscopes.ll b/test/CodeGen/AMDGPU/syncscopes.ll
index 83cf6d45e24..413b7654e05 100644
--- a/test/CodeGen/AMDGPU/syncscopes.ll
+++ b/test/CodeGen/AMDGPU/syncscopes.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s
 
 ; GCN-LABEL: name: syncscopes
 ; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)
-- 
2.50.1