AMDGPU/SI: Don't emit multi-dword flat memory ops when they might access scratch

author Tom Stellard <thomas.stellard@amd.com>

Wed, 26 Oct 2016 14:38:47 +0000 (14:38 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 26 Oct 2016 14:38:47 +0000 (14:38 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 26 Oct 2016 14:38:47 +0000 (14:38 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 26 Oct 2016 14:38:47 +0000 (14:38 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 39486abe5e691f97cf4c2193d8be6d423addef7d..3b84e386341638c7a8c32a51bb95449791fba680 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2591,6 +2591,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
      return DAG.getMergeValues(Ops, DL);
    }
  
+  MachineFunction &MF = DAG.getMachineFunction();
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  // If there is a possibilty that flat instruction access scratch memory
+  // then we need to use the same legalization rules we use for private.
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
+    AS = MFI->hasFlatScratchInit() ?
+         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+
    unsigned NumElements = MemVT.getVectorNumElements();
    switch (AS) {
    case AMDGPUAS::CONSTANT_ADDRESS:
@@ -2890,6 +2898,14 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
      return expandUnalignedStore(Store, DAG);
    }
  
+  MachineFunction &MF = DAG.getMachineFunction();
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  // If there is a possibilty that flat instruction access scratch memory
+  // then we need to use the same legalization rules we use for private.
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
+    AS = MFI->hasFlatScratchInit() ?
+         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+
    unsigned NumElements = VT.getVectorNumElements();
    switch (AS) {
    case AMDGPUAS::GLOBAL_ADDRESS:
diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll

index 1b6d8dc533af72b90c5c0095ee287aeb2097f824..0cfe6888b33586017169777ffc7b9f5eef37c97e 100644 (file)
--- a/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -1,5 +1,6 @@
-; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck  %s
+; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck  %s
+; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s
  
  ; Disable optimizations in case there are optimizations added that
  ; specialize away generic pointer accesses.
@@ -149,6 +150,28 @@ define void @flat_scratch_unaligned_store() {
    ret void
  }
  
+; CHECK-LABEL: flat_scratch_multidword_load:
+; HSA: flat_load_dword
+; HSA: flat_load_dword
+; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
+define void @flat_scratch_multidword_load() {
+  %scratch = alloca <2 x i32>
+  %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
+  %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
+  ret void
+}
+
+; CHECK-LABEL: flat_scratch_multidword_store:
+; HSA: flat_store_dword
+; HSA: flat_store_dword
+; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
+define void @flat_scratch_multidword_store() {
+  %scratch = alloca <2 x i32>
+  %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
+  store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
+  ret void
+}
+
  attributes #0 = { nounwind }
  attributes #1 = { nounwind convergent }
  attributes #3 = { nounwind readnone }
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 26 Oct 2016 14:38:47 +0000 (14:38 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 26 Oct 2016 14:38:47 +0000 (14:38 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/flat-address-space.ll		patch \| blob \| history