]> granicus.if.org Git - llvm/commitdiff
AMDGPU: Insert wait at start of callee functions
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 11 Apr 2017 22:29:31 +0000 (22:29 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 11 Apr 2017 22:29:31 +0000 (22:29 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300000 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIInsertWaits.cpp
test/CodeGen/AMDGPU/hsa-func.ll
test/CodeGen/AMDGPU/insert-waits-callee.mir [new file with mode: 0644]

index b14ef3ad3618433e015c0acd3f40029c77750051..47257ce16ceb3367b88f3a4ab28a1c4cc29d139d 100644 (file)
@@ -690,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
   for (MachineInstr *I : RemoveMI)
     I->eraseFromParent();
 
+  if (!MFI->isEntryFunction()) {
+    // Wait for any outstanding memory operations that the input registers may
+    // depend on. We can't track them and it's better to to the wait after the
+    // costly call sequence.
+
+    // TODO: Could insert earlier and schedule more liberally with operations
+    // that only use caller preserved registers.
+    MachineBasicBlock &EntryBB = MF.front();
+    BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+      .addImm(0);
+
+    Changes = true;
+  }
+
   return Changes;
 }
index d9662b69b1260db5e4ad23965941af4f931d0b02..b4cdd4030d86a79d8d91747f6b13bbd0ca28ff08 100644 (file)
@@ -26,7 +26,7 @@
 
 ; ELF: Symbol {
 ; ELF: Name: simple
-; ELF: Size: 288
+; ELF: Size: 292
 ; ELF: Type: Function (0x2)
 ; ELF: }
 
diff --git a/test/CodeGen/AMDGPU/insert-waits-callee.mir b/test/CodeGen/AMDGPU/insert-waits-callee.mir
new file mode 100644 (file)
index 0000000..ad7cd0c
--- /dev/null
@@ -0,0 +1,25 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
+--- |
+  define float @entry_callee_wait(float %arg) #0 {
+    ret float %arg
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+# CHECK-LABEL: name: entry_callee_wait{{$}}
+# CHECK: bb.0:
+# CHECK-NEXT: S_WAITCNT 0{{$}}
+# CHECK-NEXT: V_ADD_F32
+# CHECK-NEXT: S_SETPC_B64
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+  - { reg: '%vgpr0' }
+
+name: entry_callee_wait
+body:             |
+  bb.0:
+    %vgpr0 = V_ADD_F32_e32 %vgpr0, %vgpr0, implicit %exec
+    S_SETPC_B64 killed %sgpr0_sgpr1
+
+...