From: Yaxun Liu Date: Fri, 10 Mar 2017 01:30:46 +0000 (+0000) Subject: [AMDGPU] Add builtin functions readlane ds_permute mov_dpp X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7679a84815fd5a48f3e3d4e4c2a28882ec4d6574;p=clang [AMDGPU] Add builtin functions readlane ds_permute mov_dpp Differential Revision: https://reviews.llvm.org/D30551 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@297436 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsAMDGPU.def b/include/clang/Basic/BuiltinsAMDGPU.def index 1548277548..a8ab657c37 100644 --- a/include/clang/Basic/BuiltinsAMDGPU.def +++ b/include/clang/Basic/BuiltinsAMDGPU.def @@ -86,6 +86,10 @@ BUILTIN(__builtin_amdgcn_sicmpl, "LUiLiLiIi", "nc") BUILTIN(__builtin_amdgcn_fcmp, "LUiddIi", "nc") BUILTIN(__builtin_amdgcn_fcmpf, "LUiffIi", "nc") BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc") +BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc") +BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc") +BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc") +BUILTIN(__builtin_amdgcn_readlane, "iii", "nc") BUILTIN(__builtin_amdgcn_fmed3f, "ffff", "nc") //===----------------------------------------------------------------------===// @@ -103,6 +107,7 @@ TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime") +TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp") //===----------------------------------------------------------------------===// // GFX9+ only builtins. diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 6b0b9100b3..e23ff3cc38 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -2387,6 +2387,7 @@ bool AMDGPUTargetInfo::initFeatureMap( case GK_GFX8: Features["s-memrealtime"] = true; Features["16-bit-insts"] = true; + Features["dpp"] = true; break; case GK_NONE: diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 1c7ee2e6d4..85ed2b8677 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -8401,6 +8401,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp: { + llvm::SmallVector Args; + for (unsigned I = 0; I != 5; ++I) + Args.push_back(EmitScalarExpr(E->getArg(I))); + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, + Args[0]->getType()); + return Builder.CreateCall(F, Args); + } case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: diff --git a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 0c88fdfb72..1dad674918 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -81,3 +81,11 @@ void test_s_memrealtime(global ulong* out) { *out = __builtin_amdgcn_s_memrealtime(); } + +// CHECK-LABEL: @test_mov_dpp +// CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 false) +void test_mov_dpp(global int* out, int src) +{ + *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false); +} + diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl index e33c3ca526..a19ce2f1ed 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -235,6 +235,34 @@ void test_ds_swizzle(global int* out, int a) *out = __builtin_amdgcn_ds_swizzle(a, 32); } +// CHECK-LABEL: @test_ds_permute +// CHECK: call i32 @llvm.amdgcn.ds.permute(i32 %a, i32 %b) +void test_ds_permute(global int* out, int a, int b) +{ + out[0] = __builtin_amdgcn_ds_permute(a, b); +} + +// CHECK-LABEL: @test_ds_bpermute +// CHECK: call i32 @llvm.amdgcn.ds.bpermute(i32 %a, i32 %b) +void test_ds_bpermute(global int* out, int a, int b) +{ + *out = __builtin_amdgcn_ds_bpermute(a, b); +} + +// CHECK-LABEL: @test_readfirstlane +// CHECK: call i32 @llvm.amdgcn.readfirstlane(i32 %a) +void test_readfirstlane(global int* out, int a) +{ + *out = __builtin_amdgcn_readfirstlane(a); +} + +// CHECK-LABEL: @test_readlane +// CHECK: call i32 @llvm.amdgcn.readlane(i32 %a, i32 %b) +void test_readlane(global int* out, int a, int b) +{ + *out = __builtin_amdgcn_readlane(a, b); +} + // CHECK-LABEL: @test_fcmp_f32 // CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5) void test_fcmp_f32(global ulong* out, float a, float b) diff --git a/test/SemaOpenCL/builtins-amdgcn-error.cl b/test/SemaOpenCL/builtins-amdgcn-error.cl index 6cb2f38822..2639bf2775 100644 --- a/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -1,16 +1,17 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s -// FIXME: We only get one error if the functions are the other order in the -// file. - #pragma OPENCL EXTENSION cl_khr_fp64 : enable typedef unsigned long ulong; typedef unsigned int uint; -ulong test_s_memrealtime() +// To get all errors for feature checking we need to put them in one function +// since Clang will stop codegen for the next function if it finds error during +// codegen of the previous function. +void test_target_builtin(global int* out, int a) { - return __builtin_amdgcn_s_memrealtime(); // expected-error {{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}} + __builtin_amdgcn_s_memrealtime(); // expected-error {{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}} + *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, false); // expected-error {{'__builtin_amdgcn_mov_dpp' needs target feature dpp}} } void test_s_sleep(int x) @@ -92,3 +93,12 @@ void test_s_getreg(global int* out, int a) { *out = __builtin_amdgcn_s_getreg(a); // expected-error {{argument to '__builtin_amdgcn_s_getreg' must be a constant integer}} } + +void test_mov_dpp2(global int* out, int a, int b, int c, int d, bool e) +{ + *out = __builtin_amdgcn_mov_dpp(a, b, 0, 0, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}} + *out = __builtin_amdgcn_mov_dpp(a, 0, c, 0, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}} + *out = __builtin_amdgcn_mov_dpp(a, 0, 0, d, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}} + *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}} +} +