[AMDGPU] Translate reqd_work_group_size into amdgpu_flat_work_group_size

author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Thu, 6 Apr 2017 18:15:44 +0000 (18:15 +0000)

committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Thu, 6 Apr 2017 18:15:44 +0000 (18:15 +0000)
author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 6 Apr 2017 18:15:44 +0000 (18:15 +0000)
committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 6 Apr 2017 18:15:44 +0000 (18:15 +0000)
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index fadbcea9b03de87f75591cc5f8d1cff59ba5a9a7..94c3880ea26ecee75bd1d895639491dc3609331e 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -7302,9 +7302,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
  
    llvm::Function *F = cast<llvm::Function>(GV);
  
-  if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
-    unsigned Min = Attr->getMin();
-    unsigned Max = Attr->getMax();
+  const auto *ReqdWGS = M.getLangOpts().OpenCL ?
+    FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+  const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
+  if (ReqdWGS || FlatWGS) {
+    unsigned Min = FlatWGS ? FlatWGS->getMin() : 0;
+    unsigned Max = FlatWGS ? FlatWGS->getMax() : 0;
+    if (ReqdWGS && Min == 0 && Max == 0)
+      Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
  
      if (Min != 0) {
        assert(Min <= Max && "Min must be less than or equal Max");
diff --git a/test/CodeGenOpenCL/amdgpu-attrs.cl b/test/CodeGenOpenCL/amdgpu-attrs.cl

index 4ca85d316104ee5d853f4b50278b9a0e3db834ed..c914f2e6514f5dab9684200375894544797f698c 100644 (file)
--- a/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ b/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -129,6 +129,16 @@ kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64(
  // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
  }
  
+__attribute__((reqd_work_group_size(32, 2, 1))) // expected-no-diagnostics
+kernel void reqd_work_group_size_32_2_1() {
+// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1() [[FLAT_WORK_GROUP_SIZE_64_64:#[0-9]+]]
+}
+__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) // expected-no-diagnostics
+kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
+// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
+}
+
+
  // Make sure this is silently accepted on other targets.
  // X86-NOT: "amdgpu-flat-work-group-size"
  // X86-NOT: "amdgpu-waves-per-eu"
@@ -142,6 +152,8 @@ kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64(
  // CHECK-NOT: "amdgpu-num-vgpr"="0"
  
  // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="64,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind "amdgpu-flat-work-group-size"="16,128"
  // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2"
  // CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4"
  // CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32"
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Thu, 6 Apr 2017 18:15:44 +0000 (18:15 +0000)
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Thu, 6 Apr 2017 18:15:44 +0000 (18:15 +0000)
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGenOpenCL/amdgpu-attrs.cl		patch \| blob \| history