From 09680ebbcf75946252117f40aedb6e2e27ef422e Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Sat, 26 Jan 2019 00:28:32 +0000 Subject: [PATCH] [NVPTX] Some nvvm.read.ptx.sreg intrinsics should have IntrInaccessibleMemOnly attribute. These intrinsics may return different values every time they are called and should not be CSE'd. IntrInaccessibleMemOnly appears to be the right attribute to model this behavior. Differential Revision: https://reviews.llvm.org/D57259 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352256 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsNVVM.td | 22 +++++++++++------ test/CodeGen/NVPTX/intrinsics.ll | 41 +++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index d5fda2e3142..cf072c70eba 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -3673,11 +3673,19 @@ multiclass PTXReadSRegIntrinsic_v4i32 { class PTXReadSRegIntrinsic_r32 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; - class PTXReadSRegIntrinsic_r64 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; +// Intrinsics to read registers with non-constant values. E.g. the values that +// do change over the kernel lifetime. Such reads should not be CSE'd. +class PTXReadNCSRegIntrinsic_r32 + : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>, + GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; +class PTXReadNCSRegIntrinsic_r64 + : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>, + GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; + defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">; defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">; @@ -3703,13 +3711,13 @@ def int_nvvm_read_ptx_sreg_lanemask_ge : def int_nvvm_read_ptx_sreg_lanemask_gt : PTXReadSRegIntrinsic_r32<"lanemask_gt">; -def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">; -def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">; +def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">; +def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">; -def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">; -def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">; -def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">; -def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">; +def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">; +def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">; +def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">; +def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">; def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll index 668de8a994b..4abbfcbd518 100644 --- a/test/CodeGen/NVPTX/intrinsics.ll +++ b/test/CodeGen/NVPTX/intrinsics.ll @@ -94,6 +94,43 @@ define i32 @test_popc16_to_32(i16 %a) { ret i32 %zext } +; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may +; be CSE'd. +; CHECK-LABEL: test_tid +define i32 @test_tid() { +; CHECK: mov.u32 %r{{.*}}, %tid.x; + %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x; + %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %ret = add i32 %a, %b +; CHECK: ret + ret i32 %ret +} + +; reading clock() or clock64() should not be CSE'd as each read may return +; different value. +; CHECK-LABEL: test_clock +define i32 @test_clock() { +; CHECK: mov.u32 %r{{.*}}, %clock; + %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock() +; CHECK: mov.u32 %r{{.*}}, %clock; + %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock() + %ret = add i32 %a, %b +; CHECK: ret + ret i32 %ret +} + +; CHECK-LABEL: test_clock64 +define i64 @test_clock64() { +; CHECK: mov.u64 %r{{.*}}, %clock64; + %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64() +; CHECK: mov.u64 %r{{.*}}, %clock64; + %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64() + %ret = add i64 %a, %b +; CHECK: ret + ret i64 %ret +} + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare float @llvm.nvvm.sqrt.f(float) @@ -103,3 +140,7 @@ declare i64 @llvm.bitreverse.i64(i64) declare i16 @llvm.ctpop.i16(i16) declare i32 @llvm.ctpop.i32(i32) declare i64 @llvm.ctpop.i64(i64) + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.clock() +declare i64 @llvm.nvvm.read.ptx.sreg.clock64() -- 2.50.1