From f25266101e3a715fb377960a7cf99c4f9ec2f37e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 25 Feb 2019 20:16:11 +0000 Subject: [PATCH] AMDGPU: Remove IntrReadMem from memtime/memrealtime intrinsics EarlyCSE with MemorySSA was able to use this to merge multiple calls with no intervening store. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354814 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- lib/Target/AMDGPU/SMInstructions.td | 12 +++++- test/Transforms/EarlyCSE/AMDGPU/lit.local.cfg | 5 +++ .../Transforms/EarlyCSE/AMDGPU/memrealtime.ll | 43 +++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 test/Transforms/EarlyCSE/AMDGPU/lit.local.cfg create mode 100644 test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 151329cce18..b7b3d725b04 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1118,7 +1118,7 @@ def int_amdgcn_s_dcache_inv : def int_amdgcn_s_memtime : GCCBuiltin<"__builtin_amdgcn_s_memtime">, - Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>; + Intrinsic<[llvm_i64_ty], []>; def int_amdgcn_s_sleep : GCCBuiltin<"__builtin_amdgcn_s_sleep">, @@ -1391,7 +1391,7 @@ def int_amdgcn_s_dcache_wb_vol : def int_amdgcn_s_memrealtime : GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, - Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>; + Intrinsic<[llvm_i64_ty]>; // llvm.amdgcn.ds.permute def int_amdgcn_ds_permute : diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 797586bbb54..fe70bffdebd 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -152,11 +152,19 @@ multiclass SM_Pseudo_Discards { def _SGPR : SM_Discard_Pseudo ; } -class SM_Time_Pseudo : SM_Pseudo< +class SM_Time_Pseudo : SM_Pseudo< opName, (outs SReg_64_XEXEC:$sdst), (ins), " $sdst", [(set i64:$sdst, (node))]> { let hasSideEffects = 1; - let mayStore = 0; + + // FIXME: This should be definitively mayStore = 0. TableGen + // brokenly tries to infer these based on the intrinsic properties + // corresponding to the IR attributes. The target intrinsics are + // considered as writing to memory for IR dependency purposes, but + // those can be modeled with hasSideEffects here. These also end up + // inferring differently for llvm.readcyclecounter and the amdgcn + // intrinsics. + let mayStore = ?; let mayLoad = 1; let has_sbase = 0; let has_offset = 0; diff --git a/test/Transforms/EarlyCSE/AMDGPU/lit.local.cfg b/test/Transforms/EarlyCSE/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..4536d089640 --- /dev/null +++ b/test/Transforms/EarlyCSE/AMDGPU/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll'] + +targets = set(config.root.targets_to_build.split()) +if not 'AMDGPU' in targets: + config.unsupported = True diff --git a/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll b/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll new file mode 100644 index 00000000000..6b42ee8d71e --- /dev/null +++ b/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + +; CHECK-LABEL: @memrealtime( +; CHECK: call i64 @llvm.amdgcn.s.memrealtime() +; CHECK: call i64 @llvm.amdgcn.s.memrealtime() +define amdgpu_kernel void @memrealtime(i64 %cycles) #0 { +entry: + %0 = tail call i64 @llvm.amdgcn.s.memrealtime() + %cmp3 = icmp sgt i64 %cycles, 0 + br i1 %cmp3, label %while.body, label %while.end + +while.body: + %1 = tail call i64 @llvm.amdgcn.s.memrealtime() + %sub = sub nsw i64 %1, %0 + %cmp = icmp slt i64 %sub, %cycles + br i1 %cmp, label %while.body, label %while.end + +while.end: + ret void +} + +; CHECK-LABEL: @memtime( +; CHECK: call i64 @llvm.amdgcn.s.memtime() +; CHECK: call i64 @llvm.amdgcn.s.memtime() +define amdgpu_kernel void @memtime(i64 %cycles) #0 { +entry: + %0 = tail call i64 @llvm.amdgcn.s.memtime() + %cmp3 = icmp sgt i64 %cycles, 0 + br i1 %cmp3, label %while.body, label %while.end + +while.body: + %1 = tail call i64 @llvm.amdgcn.s.memtime() + %sub = sub nsw i64 %1, %0 + %cmp = icmp slt i64 %sub, %cycles + br i1 %cmp, label %while.body, label %while.end + +while.end: + ret void +} + +declare i64 @llvm.amdgcn.s.memrealtime() +declare i64 @llvm.amdgcn.s.memtime() -- 2.40.0