From dfdb788875e2637c2f68c19501cd97ef821a124f Mon Sep 17 00:00:00 2001 From: Alexander Timofeev Date: Fri, 2 Jun 2017 15:25:52 +0000 Subject: [PATCH] AMDGPUAnnotateUniformValue should always treat volatile loads as divergent git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304554 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- lib/Target/AMDGPU/SMInstructions.td | 1 + .../CodeGen/AMDGPU/not-scalarize-volatile-load.ll | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 76c2644867a..b48b2391110 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3571,7 +3571,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && - isMemOpHasNoClobberedMemOperand(Load)) + !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 5b840a14dbc..73dd8b7daa4 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N)) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && + !Ld->isVolatile() && static_cast(getTargetLowering())->isMemOpUniform(N) && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll new file mode 100644 index 00000000000..bced3c408c5 --- /dev/null +++ b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: @volatile_load +; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 +; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} + +define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, i32 addrspace(1)* nocapture %arg1) { +bb: + %tmp18 = load volatile i32, i32 addrspace(1)* %arg, align 4 + %tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 5 + store i32 %tmp18, i32 addrspace(1)* %tmp26, align 4 + ret void +} -- 2.50.1