Merging r260651:

author Tom Stellard <thomas.stellard@amd.com>

Fri, 3 Jun 2016 15:58:20 +0000 (15:58 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 3 Jun 2016 15:58:20 +0000 (15:58 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 3 Jun 2016 15:58:20 +0000 (15:58 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 3 Jun 2016 15:58:20 +0000 (15:58 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td

index e7ba2bee1817d46d9fbf2b00e24e6208da63649f..df8b5b5bc8bdec15ce76d85898662d2560e12323 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -206,6 +206,17 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
          "true",
          "Additional intstructions for CI+">;
  
+class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
+  "max-private-element-size-"#size,
+  "MaxPrivateElementSize",
+  !cast<string>(size),
+  "Maximum private access size may be "#size
+>;
+
+def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
+def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
+def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
+
  // Dummy feature used to disable assembler instructions.
  def FeatureDisable : SubtargetFeature<"",
                                        "FeatureDisable","true",
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

index 1239dfb235efa805016eb84f26dfd461052d6a76..89c090b93164598a8d597265b7c809ecce88cec6 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -593,6 +593,20 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
    }
  }
  
+// This is supposed to be log2(Size)
+static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMD_ELEMENT_4_BYTES;
+  case 8:
+    return AMD_ELEMENT_8_BYTES;
+  case 16:
+    return AMD_ELEMENT_16_BYTES;
+  default:
+    llvm_unreachable("invalid private_element_size");
+  }
+}
+
  void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
                                           const SIProgramInfo &KernelInfo) const {
    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -606,6 +620,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
        (KernelInfo.ComputePGMRSrc2 << 32);
    header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
  
+
+  AMD_HSA_BITS_SET(header.code_properties,
+                   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
+                   getElementByteSizeValue(STM.getMaxPrivateElementSize()));
+
    if (MFI->hasPrivateSegmentBuffer()) {
      header.code_properties |=
        AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index 9dd86a25b2d6dd1bdaef07bc2dc70fc542e4f4f3..88fe89eadc1480f2f66fb65724144535b291d905 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -58,6 +58,11 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
      FP32Denormals = false;
      FP64Denormals = false;
    }
+
+  // Set defaults if needed.
+  if (MaxPrivateElementSize == 0)
+    MaxPrivateElementSize = 16;
+
    return *this;
  }
  
@@ -72,6 +77,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
        EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
        EnableXNACK(false),
        WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+      MaxPrivateElementSize(0),
        EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
        GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
        IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 441bfab40899c1ddbf7acb48d8ea2b5d3f69e854..e36a27779a46294725365fbad0eb5f8b420a9720 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -79,6 +79,7 @@ private:
    unsigned WavefrontSize;
    bool CFALUBug;
    int LocalMemorySize;
+  unsigned MaxPrivateElementSize;
    bool EnableVGPRSpilling;
    bool SGPRInitBug;
    bool IsGCN;
@@ -243,6 +244,10 @@ public:
      return LocalMemorySize;
    }
  
+  unsigned getMaxPrivateElementSize() const {
+    return MaxPrivateElementSize;
+  }
+
    bool hasSGPRInitBug() const {
      return SGPRInitBug;
    }
diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h

index a9ba60c8cbad02498df07bb8d880784df071bf8b..425261c15f91250a8c13f249af51b981357257c4 100644 (file)
--- a/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -44,6 +44,15 @@ enum amd_code_version_t {
    AMD_CODE_VERSION_MINOR = 1
  };
  
+// Sets val bits for specified mask in specified dst packed instance.
+#define AMD_HSA_BITS_SET(dst, mask, val)                                       \
+  dst &= (~(1 << mask ## _SHIFT) & ~mask);                                     \
+  dst |= (((val) << mask ## _SHIFT) & mask)
+
+// Gets bits for specified mask from specified src packed instance.
+#define AMD_HSA_BITS_GET(src, mask)                                            \
+  ((src & mask) >> mask ## _SHIFT)                                             \
+
  /// The values used to define the number of bytes to use for the
  /// swizzle element size.
  enum amd_element_byte_size_t {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index cadfd2428d28867720d363bd66a3532b3618faf8..c56835dc5550a17251cda7983fbdd5717b3571de 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3120,6 +3120,10 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
                      AMDGPU::RSRC_TID_ENABLE |
                      0xffffffff; // Size;
  
+  uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+
+  Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
+
    // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
    // Clear them unless we want a huge stride.
    if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h

index 099eeb256e15df8a06c298198eb35511d1fb4c4c..1f3aec3aff199e0e94a5698c1f3fea35b6b76cf4 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -498,7 +498,7 @@ namespace AMDGPU {
  
    const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
    const uint64_t RSRC_TID_ENABLE = 1LL << 55;
-
+  const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51;
  } // End namespace AMDGPU
  
  namespace SI {
diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll

index 84380b4210514264444dc69fd7368ee98c00c83d..da40c8593e90dae651f3dabd15c0fccefb7f94c5 100644 (file)
--- a/test/CodeGen/AMDGPU/large-alloca-compute.ll
+++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -10,8 +10,8 @@
  ; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
  ; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
  ; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x80f000
-; VI: s_mov_b32 s11, 0x800000
+; CI: s_mov_b32 s11, 0x98f000
+; VI: s_mov_b32 s11, 0x980000
  
  
  ; GCNHSA: .amd_kernel_code_t
diff --git a/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/test/CodeGen/AMDGPU/large-alloca-graphics.ll

index b6f8093313cb764e7a2f0388fa71361ba0bb32c6..7e48b16e52343db44c1db1520c5f5d1922cee5d4 100644 (file)
--- a/test/CodeGen/AMDGPU/large-alloca-graphics.ll
+++ b/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -5,8 +5,8 @@
  ; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
  ; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
  ; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x80f000
-; VI: s_mov_b32 s11, 0x800000
+; CI: s_mov_b32 s11, 0x98f000
+; VI: s_mov_b32 s11, 0x980000
  
  ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
  ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
@@ -26,8 +26,8 @@ define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 {
  ; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
  ; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
  ; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x80f000
-; VI: s_mov_b32 s11, 0x800000
+; CI: s_mov_b32 s11, 0x98f000
+; VI: s_mov_b32 s11, 0x980000
  
  ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
  ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll

index cd7c78f408ddde3eac65a5a0b2e72b135ed9a37b..5db3161d8fdebd7521eb9e189596e72e0c30ac07 100644 (file)
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -21,8 +21,8 @@ declare i32 @llvm.r600.read.tgid.z() #1
  ; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
  ; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
  ; GCN-NEXT: s_mov_b32 s14, -1
-; SI-NEXT: s_mov_b32 s15, 0x80f000
-; VI-NEXT: s_mov_b32 s15, 0x800000
+; SI-NEXT: s_mov_b32 s15, 0x98f000
+; VI-NEXT: s_mov_b32 s15, 0x980000
  
  
  ; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

index 16abb89bb0b80b59abd2a2a63e0d83db4c452bb0..eeecf6d23987bd959df6af5a3ef23bf6eca3dacf 100644 (file)
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -14,8 +14,8 @@
  ; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
  ; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
  ; GCN-NEXT: s_mov_b32 s14, -1
-; SI-NEXT: s_mov_b32 s15, 0x80f000
-; VI-NEXT: s_mov_b32 s15, 0x800000
+; SI-NEXT: s_mov_b32 s15, 0x98f000
+; VI-NEXT: s_mov_b32 s15, 0x980000
  
  ; s12 is offset user SGPR
  ; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 3 Jun 2016 15:58:20 +0000 (15:58 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 3 Jun 2016 15:58:20 +0000 (15:58 +0000)
lib/Target/AMDGPU/AMDGPU.td		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.h		patch \| blob \| history
lib/Target/AMDGPU/AMDKernelCodeT.h		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
test/CodeGen/AMDGPU/large-alloca-compute.ll		patch \| blob \| history
test/CodeGen/AMDGPU/large-alloca-graphics.ll		patch \| blob \| history
test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll		patch \| blob \| history
test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll		patch \| blob \| history