AMDGPU/SI: Set INDEX_STRIDE for scratch coalescing

author Marek Olsak <marek.olsak@amd.com>

Mon, 13 Jun 2016 16:05:57 +0000 (16:05 +0000)

committer Marek Olsak <marek.olsak@amd.com>

Mon, 13 Jun 2016 16:05:57 +0000 (16:05 +0000)
author Marek Olsak <marek.olsak@amd.com>
Mon, 13 Jun 2016 16:05:57 +0000 (16:05 +0000)
committer Marek Olsak <marek.olsak@amd.com>
Mon, 13 Jun 2016 16:05:57 +0000 (16:05 +0000)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index 24add83e56983875872a0f7a3a63e05d07506882..af246c07bf73fe2d199874567063304b02487790 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3095,7 +3095,9 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
  
    uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
  
-  Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
+  Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
+            // IndexStride = 64
+            (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
  
    // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
    // Clear them unless we want a huge stride.
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h

index ce1aa2871e772462ee6e5f53cff0b93b0377666b..a20b817888859ab81a5c8e36fe63b8b1e3e7ec23 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -547,8 +547,9 @@ namespace AMDGPU {
    int getAtomicNoRetOp(uint16_t Opcode);
  
    const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
-  const uint64_t RSRC_TID_ENABLE = 1LL << 55;
-  const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51;
+  const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
+  const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
+  const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
  } // End namespace AMDGPU
  
  namespace SI {
diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll

index 5a75cf80da0a26fd5968942980c57f16111b539f..0a947328221d01b492e070f0b7553446d6100035 100644 (file)
--- a/test/CodeGen/AMDGPU/large-alloca-compute.ll
+++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -12,8 +12,8 @@
  ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
  ; GCN-DAG: ; fixup A - offset: 4, value: SCRATCH_RSRC_DWORD1, kind: FK_Data_4
  ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
-; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0x88f000
-; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0x880000
+; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe8f000
+; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe80000
  
  
  ; GCNHSA: .amd_kernel_code_t
diff --git a/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/test/CodeGen/AMDGPU/large-alloca-graphics.ll

index 2bb67ca0c7ad129a3afcbee0cf54bcc0dfbb5510..fb0e15eb0cb9f23592196d2b573974d31cedb01e 100644 (file)
--- a/test/CodeGen/AMDGPU/large-alloca-graphics.ll
+++ b/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -5,8 +5,8 @@
  ; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
  ; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
  ; GCN-DAG: s_mov_b32 s10, -1
-; CI-DAG: s_mov_b32 s11, 0x88f000
-; VI-DAG: s_mov_b32 s11, 0x880000
+; CI-DAG: s_mov_b32 s11, 0xe8f000
+; VI-DAG: s_mov_b32 s11, 0xe80000
  
  ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
  ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
@@ -26,8 +26,8 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
  ; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
  ; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
  ; GCN-DAG: s_mov_b32 s10, -1
-; CI-DAG: s_mov_b32 s11, 0x88f000
-; VI-DAG: s_mov_b32 s11, 0x880000
+; CI-DAG: s_mov_b32 s11, 0xe8f000
+; VI-DAG: s_mov_b32 s11, 0xe80000
  
  ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
  ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll

index 190ad62d0f36ae7be52c72159fadf51669aeea48..6c33bc98c6052cdfe5ba77596bc7ac3b126ab3d3 100644 (file)
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -23,8 +23,8 @@
  ; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
  ; GCNMESA--DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
  ; GCNMESA-DAG: s_mov_b32 s14, -1
-; SIMESA-DAG: s_mov_b32 s15, 0x88f000
-; VIMESA-DAG: s_mov_b32 s15, 0x880000
+; SIMESA-DAG: s_mov_b32 s15, 0xe8f000
+; VIMESA-DAG: s_mov_b32 s15, 0xe80000
  
  
  ; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

index 82b10ad01845bbf083add022ab96d62ef6b09afc..94d9e3ebf567ed0cefcebd68dd5f87c4e329b2e5 100644 (file)
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -16,8 +16,8 @@
  ; GCN-DAG: s_mov_b32 s16, SCRATCH_RSRC_DWORD0
  ; GCN-DAG: s_mov_b32 s17, SCRATCH_RSRC_DWORD1
  ; GCN-DAG: s_mov_b32 s18, -1
-; SI-DAG: s_mov_b32 s19, 0x88f000
-; VI-DAG: s_mov_b32 s19, 0x880000
+; SI-DAG: s_mov_b32 s19, 0xe8f000
+; VI-DAG: s_mov_b32 s19, 0xe80000
  
  ; s13 is offset system SGPR
  ; GCN: buffer_store_dword {{v[0-9]+}}, off, s[16:19], s13 offset:{{[0-9]+}} ; 16-byte Folded Spill
author	Marek Olsak <marek.olsak@amd.com>
	Mon, 13 Jun 2016 16:05:57 +0000 (16:05 +0000)
committer	Marek Olsak <marek.olsak@amd.com>
	Mon, 13 Jun 2016 16:05:57 +0000 (16:05 +0000)
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
test/CodeGen/AMDGPU/large-alloca-compute.ll		patch \| blob \| history
test/CodeGen/AMDGPU/large-alloca-graphics.ll		patch \| blob \| history
test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll		patch \| blob \| history
test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll		patch \| blob \| history