[AMDGPU] Eliminate barrier if workgroup size is not greater than wavefront size

author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Thu, 6 Apr 2017 16:48:30 +0000 (16:48 +0000)

committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Thu, 6 Apr 2017 16:48:30 +0000 (16:48 +0000)
author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 6 Apr 2017 16:48:30 +0000 (16:48 +0000)
committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 6 Apr 2017 16:48:30 +0000 (16:48 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 36f16e3ca9ab2b1765e7d3b7fdb126b4e958865b..3052439358ab1f6c904e84d48790641fc4baa5fc 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3159,6 +3159,17 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
      SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
      return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
    }
+  case Intrinsic::amdgcn_s_barrier: {
+    if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
+      const MachineFunction &MF = DAG.getMachineFunction();
+      const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+      unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
+      if (WGSize <= ST.getWavefrontSize())
+        return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
+                                          Op.getOperand(0)), 0);
+    }
+    return SDValue();
+  };
    default:
      return Op;
    }
diff --git a/test/CodeGen/AMDGPU/barrier-elimination.ll b/test/CodeGen/AMDGPU/barrier-elimination.ll

new file mode 100644 (file)

index 0000000..c526baa
--- /dev/null
+++ b/test/CodeGen/AMDGPU/barrier-elimination.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=amdgcn < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}unknown_wgs:
+; CHECK: s_barrier
+define amdgpu_kernel void @unknown_wgs() {
+  tail call void @llvm.amdgcn.s.barrier() #0
+  ret void
+}
+
+; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
+; CHECK: s_barrier
+define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
+  tail call void @llvm.amdgcn.s.barrier() #0
+  ret void
+}
+
+; CHECK-LABEL: {{^}}flat_wgs_attr_32_64:
+; CHECK: :
+; CHECK-NEXT: ; wave barrier
+; CHECK-NEXT: s_endpgm
+define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
+  tail call void @llvm.amdgcn.s.barrier() #0
+  ret void
+}
+
+declare void @llvm.amdgcn.s.barrier() #0
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
+attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
diff --git a/test/CodeGen/AMDGPU/indirect-private-64.ll b/test/CodeGen/AMDGPU/indirect-private-64.ll

index b6dea01cab4cd08b623859ad6921c0a02c7ce503..7f08a89d149e97c260561c0261e911a27b2de1b7 100644 (file)
--- a/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -121,4 +121,4 @@ define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* n
  }
  
  attributes #0 = { convergent nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,64" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" }
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Thu, 6 Apr 2017 16:48:30 +0000 (16:48 +0000)
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Thu, 6 Apr 2017 16:48:30 +0000 (16:48 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/barrier-elimination.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AMDGPU/indirect-private-64.ll		patch \| blob \| history