From: Stanislav Mekhanoshin Date: Thu, 6 Apr 2017 18:15:44 +0000 (+0000) Subject: [AMDGPU] Translate reqd_work_group_size into amdgpu_flat_work_group_size X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4f684ad2b7602c4caaf6183525e04fb1eecec6d1;p=clang [AMDGPU] Translate reqd_work_group_size into amdgpu_flat_work_group_size These two attributes specify the same info in a different way. AMGPU BE only checks the latter as a target specific attribute as opposed to language specific reqd_work_group_size. This change produces amdgpu_flat_work_group_size out of reqd_work_group_size if specified. Differential Revision: https://reviews.llvm.org/D31728 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@299678 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index fadbcea9b0..94c3880ea2 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -7302,9 +7302,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast(GV); - if (const auto *Attr = FD->getAttr()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr() : nullptr; + const auto *FlatWGS = FD->getAttr(); + if (ReqdWGS || FlatWGS) { + unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; + unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); diff --git a/test/CodeGenOpenCL/amdgpu-attrs.cl b/test/CodeGenOpenCL/amdgpu-attrs.cl index 4ca85d3161..c914f2e651 100644 --- a/test/CodeGenOpenCL/amdgpu-attrs.cl +++ b/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -129,6 +129,16 @@ kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64( // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] } +__attribute__((reqd_work_group_size(32, 2, 1))) // expected-no-diagnostics +kernel void reqd_work_group_size_32_2_1() { +// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1() [[FLAT_WORK_GROUP_SIZE_64_64:#[0-9]+]] +} +__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) // expected-no-diagnostics +kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() { +// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]] +} + + // Make sure this is silently accepted on other targets. // X86-NOT: "amdgpu-flat-work-group-size" // X86-NOT: "amdgpu-waves-per-eu" @@ -142,6 +152,8 @@ kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64( // CHECK-NOT: "amdgpu-num-vgpr"="0" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="64,64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind "amdgpu-flat-work-group-size"="16,128" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4" // CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32"