From: Jan Vesely Date: Sun, 10 Jul 2016 22:38:04 +0000 (+0000) Subject: AMDGPU: Export workitem builtins X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=72c8091697858422eefc75c89842fb246da2cb59;p=clang AMDGPU: Export workitem builtins Reviewers: tstellardAMD Differential Revision: http://reviews.llvm.org/D20299 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@275030 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsAMDGPU.def b/include/clang/Basic/BuiltinsAMDGPU.def index 1ebd9fe7f1..0c0b8865f7 100644 --- a/include/clang/Basic/BuiltinsAMDGPU.def +++ b/include/clang/Basic/BuiltinsAMDGPU.def @@ -17,6 +17,20 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif +//===----------------------------------------------------------------------===// +// SI+ only builtins. +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc") +BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*2", "nc") + +BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workgroup_id_z, "Ui", "nc") + +BUILTIN(__builtin_amdgcn_workitem_id_x, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc") //===----------------------------------------------------------------------===// // Instruction builtins. @@ -67,6 +81,20 @@ TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime") //===----------------------------------------------------------------------===// BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") +//===----------------------------------------------------------------------===// +// R600-NI only builtins. +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_r600_implicitarg_ptr, "Uc*7", "nc") + +BUILTIN(__builtin_r600_read_tgid_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_tgid_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_tgid_z, "Ui", "nc") + +BUILTIN(__builtin_r600_read_tidig_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_tidig_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_tidig_z, "Ui", "nc") + //===----------------------------------------------------------------------===// // Legacy names with amdgpu prefix //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 2258957988..42fa8ae79f 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include using namespace clang; @@ -331,6 +332,17 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, return CGF.Builder.CreateExtractValue(Tmp, 0); } +static Value *emitRangedBuiltin(CodeGenFunction &CGF, + unsigned IntrinsicID, + int low, int high) { + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); + llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + llvm::Instruction *Call = CGF.Builder.CreateCall(F); + Call->setMetadata(llvm::LLVMContext::MD_range, RNode); + return Call; +} + namespace { struct WidthAndSignedness { unsigned Width; @@ -7670,6 +7682,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); } + + // amdgcn workitem + case AMDGPU::BI__builtin_amdgcn_workitem_id_x: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_workitem_id_y: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_workitem_id_z: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); + + // r600 workitem + case AMDGPU::BI__builtin_r600_read_tidig_x: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); + case AMDGPU::BI__builtin_r600_read_tidig_y: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); + case AMDGPU::BI__builtin_r600_read_tidig_z: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); default: return nullptr; } diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl index 641bcb2d20..ce463ae9d6 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -291,6 +291,49 @@ void test_legacy_ldexp_f64(global double* out, double a, int b) *out = __builtin_amdgpu_ldexp(a, b); } +// CHECK-LABEL: @test_kernarg_segment_ptr +// CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() +void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out) +{ + *out = __builtin_amdgcn_kernarg_segment_ptr(); +} + +// CHECK-LABEL: @test_implicitarg_ptr +// CHECK: call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() +void test_implicitarg_ptr(__attribute__((address_space(2))) unsigned char ** out) +{ + *out = __builtin_amdgcn_implicitarg_ptr(); +} + +// CHECK-LABEL: @test_get_group_id( +// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x() +// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y() +// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.z() +void test_get_group_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_workgroup_id_x(); break; + case 1: *out = __builtin_amdgcn_workgroup_id_y(); break; + case 2: *out = __builtin_amdgcn_workgroup_id_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_local_id( +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]] +void test_get_local_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_workitem_id_x(); break; + case 1: *out = __builtin_amdgcn_workitem_id_y(); break; + case 2: *out = __builtin_amdgcn_workitem_id_z(); break; + default: *out = 0; + } +} + +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } -// CHECK: ![[EXEC]] = !{!"exec"} +// CHECK-DAG: ![[EXEC]] = !{!"exec"} diff --git a/test/CodeGenOpenCL/builtins-r600.cl b/test/CodeGenOpenCL/builtins-r600.cl index 9ebcb1fe9d..0af663e6db 100644 --- a/test/CodeGenOpenCL/builtins-r600.cl +++ b/test/CodeGenOpenCL/builtins-r600.cl @@ -32,3 +32,40 @@ void test_legacy_ldexp_f64(global double* out, double a, int b) *out = __builtin_amdgpu_ldexp(a, b); } #endif + +// CHECK-LABEL: @test_implicitarg_ptr +// CHECK: call i8 addrspace(7)* @llvm.r600.implicitarg.ptr() +void test_implicitarg_ptr(__attribute__((address_space(7))) unsigned char ** out) +{ + *out = __builtin_r600_implicitarg_ptr(); +} + +// CHECK-LABEL: @test_get_group_id( +// CHECK: tail call i32 @llvm.r600.read.tgid.x() +// CHECK: tail call i32 @llvm.r600.read.tgid.y() +// CHECK: tail call i32 @llvm.r600.read.tgid.z() +void test_get_group_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_tgid_x(); break; + case 1: *out = __builtin_r600_read_tgid_y(); break; + case 2: *out = __builtin_r600_read_tgid_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_local_id( +// CHECK: tail call i32 @llvm.r600.read.tidig.x(), !range [[WI_RANGE:![0-9]*]] +// CHECK: tail call i32 @llvm.r600.read.tidig.y(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.r600.read.tidig.z(), !range [[WI_RANGE]] +void test_get_local_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_tidig_x(); break; + case 1: *out = __builtin_r600_read_tidig_y(); break; + case 2: *out = __builtin_r600_read_tidig_z(); break; + default: *out = 0; + } +} + +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024}