From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 28 Apr 2015 21:23:00 +0000 (+0000)
Subject: Merging r231659:
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5684d399381bb7e883d29a2505ad4f5ff4c4fa22;p=llvm

Merging r231659:

------------------------------------------------------------------------
r231659 | marek.olsak | 2015-03-09 11:48:09 -0400 (Mon, 09 Mar 2015) | 4 lines

R600/SI: Limit SGPRs to 80 on Tonga and Iceland

This is a candidate for stable.

------------------------------------------------------------------------

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@236038 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 1df4448abf0..03f2bbe4820 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -97,6 +97,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
         "true",
         "Enable spilling of VGPRs to scratch memory">;
 
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+        "SGPRInitBug",
+        "true",
+        "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
 class SubtargetFeatureFetchLimit <string Value> :
                           SubtargetFeature <"fetch"#Value,
         "TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 6185e367ff5..1fae26e18a4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -343,6 +343,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.NumVGPR = MaxVGPR + 1;
   ProgInfo.NumSGPR = MaxSGPR + 1;
 
+  if (STM.hasSGPRInitBug()) {
+    if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+      llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+    ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+  }
+
   ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
   ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index b1c7498fc14..87cdb5f8db8 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -80,7 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
       FlatAddressSpace(false), EnableIRStructurizer(true),
       EnablePromoteAlloca(false), EnableIfCvt(true),
       EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
-      EnableVGPRSpilling(false),
+      EnableVGPRSpilling(false),SGPRInitBug(false),
       DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 566b45c1dcc..eeb41d3ec7f 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -45,6 +45,10 @@ public:
     VOLCANIC_ISLANDS,
   };
 
+  enum {
+    FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+  };
+
 private:
   std::string DevName;
   bool Is64bit;
@@ -66,6 +70,7 @@ private:
   bool CFALUBug;
   int LocalMemorySize;
   bool EnableVGPRSpilling;
+  bool SGPRInitBug;
 
   const DataLayout DL;
   AMDGPUFrameLowering FrameLowering;
@@ -203,6 +208,10 @@ public:
     return LocalMemorySize;
   }
 
+  bool hasSGPRInitBug() const {
+    return SGPRInitBug;
+  }
+
   unsigned getAmdKernelCodeChipID() const;
 
   bool enableMachineScheduler() const override {
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 659301651c1..e5fef0c1d03 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -113,8 +113,12 @@ def : ProcessorModel<"mullins",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
 // Volcanic Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"tonga",   SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"tonga",   SIQuarterSpeedModel,
+  [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
 
-def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+  [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
 
 def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 160ff655bad..a2c536c6942 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -46,6 +46,23 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(AMDGPU::VGPR255);
   Reserved.set(AMDGPU::VGPR254);
 
+  // Tonga and Iceland can only allocate a fixed number of SGPRs due
+  // to a hw bug.
+  if (ST.hasSGPRInitBug()) {
+    unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+    // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
+    // Assume XNACK_MASK is unused.
+    unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+
+    for (unsigned i = Limit; i < NumSGPRs; ++i) {
+      unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
+      MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
+
+      for (; R.isValid(); ++R)
+        Reserved.set(*R);
+    }
+  }
+
   return Reserved;
 }
 
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index aca3109f424..f801b3f5735 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
 
 ; ELF: Format: ELF32
 ; ELF: Name: .AMDGPU.config
@@ -15,7 +17,8 @@
 ; CONFIG: test:
 ; CONFIG: .section .AMDGPU.config
 ; CONFIG-NEXT: .long   45096
-; CONFIG-NEXT: .long   0
+; TYPICAL-NEXT: .long   0
+; TONGA-NEXT: .long   576
 define void @test(i32 %p) #0 {
    %i = add i32 %p, 2
    %r = bitcast i32 %i to float