OpenCL: Assume functions are convergent

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 6 Oct 2017 19:34:40 +0000 (19:34 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 6 Oct 2017 19:34:40 +0000 (19:34 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 6 Oct 2017 19:34:40 +0000 (19:34 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 6 Oct 2017 19:34:40 +0000 (19:34 +0000)
diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h

index 8488515d2b6775068a842b095a5a2a1e0e5aa695..51fb5ad66120b8ba225f4e3a31a0900e05383235 100644 (file)
--- a/include/clang/Basic/LangOptions.h
+++ b/include/clang/Basic/LangOptions.h
@@ -197,6 +197,10 @@ public:
    bool allowsNonTrivialObjCLifetimeQualifiers() const {
      return ObjCAutoRefCount || ObjCWeak;
    }
+
+  bool assumeFunctionsAreConvergent() const {
+    return (CUDA && CUDAIsDevice) || OpenCL;
+  }
  };
  
  /// \brief Floating point control options
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index a432c3ca20d1bf06f59efc93dd28343d671f3d37..d022c99711549e20569e7b17e09a8be295dca0d9 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -1750,13 +1750,16 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
        FuncAttrs.addAttribute("backchain");
    }
  
-  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
-    // Conservatively, mark all functions and calls in CUDA as convergent
-    // (meaning, they may call an intrinsically convergent op, such as
-    // __syncthreads(), and so can't have certain optimizations applied around
-    // them).  LLVM will remove this attribute where it safely can.
+  if (getLangOpts().assumeFunctionsAreConvergent()) {
+    // Conservatively, mark all functions and calls in CUDA and OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as __syncthreads() / barrier(), and so can't have certain optimizations
+    // applied around them).  LLVM will remove this attribute where it safely
+    // can.
      FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+  }
  
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
      // Exceptions aren't supported in CUDA device code.
      FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
  
diff --git a/test/CodeGenOpenCL/amdgpu-attrs.cl b/test/CodeGenOpenCL/amdgpu-attrs.cl

index 230e0948f8cc81a7b97d6124f8b6c65f4c4fea88..2696123f1434fe0bb6f989d017be051a2f89d613 100644 (file)
--- a/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ b/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -151,28 +151,28 @@ kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
  // CHECK-NOT: "amdgpu-num-sgpr"="0"
  // CHECK-NOT: "amdgpu-num-vgpr"="0"
  
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64"
-
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
-
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
diff --git a/test/CodeGenOpenCL/convergent.cl b/test/CodeGenOpenCL/convergent.cl

index c6bcb52a6d75055d4aeb46992575be616a62ebc3..c46205bb9b12df0acef3bdb0825a565ae1c32677 100644 (file)
--- a/test/CodeGenOpenCL/convergent.cl
+++ b/test/CodeGenOpenCL/convergent.cl
@@ -1,9 +1,19 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s
+
+// This is initially assumed convergent, but can be deduced to not require it.
+
+// CHECK-LABEL: define spir_func void @non_convfun() local_unnamed_addr #0
+// CHECK: ret void
+__attribute__((noinline))
+void non_convfun(void) {
+  volatile int* p;
+  *p = 0;
+}
  
  void convfun(void) __attribute__((convergent));
-void non_convfun(void);
  void nodupfun(void) __attribute__((noduplicate));
  
+// External functions should be assumed convergent.
  void f(void);
  void g(void);
  
@@ -17,19 +27,23 @@ void g(void);
  //      non_convfun();
  //    }
  //
-// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]])
-// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK-LABEL: define spir_func void @test_merge_if(i32 %a) local_unnamed_addr #1 {
+// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
  // CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
+
  // CHECK: [[if_then]]:
  // CHECK: tail call spir_func void @f()
  // CHECK: tail call spir_func void @non_convfun()
  // CHECK: tail call spir_func void @g()
+
  // CHECK: br label %[[if_end3:.+]]
+
  // CHECK: [[if_end3_critedge]]:
  // CHECK: tail call spir_func void @non_convfun()
  // CHECK: br label %[[if_end3]]
+
  // CHECK: [[if_end3]]:
-// CHECK-LABEL: ret void
+// CHECK: ret void
  
  void test_merge_if(int a) {
    if (a) {
@@ -41,13 +55,13 @@ void test_merge_if(int a) {
    }
  }
  
-// CHECK-DAG: declare spir_func void @f()
-// CHECK-DAG: declare spir_func void @non_convfun()
-// CHECK-DAG: declare spir_func void @g()
+// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2
+// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2
+
  
  // Test two if's are not merged.
-// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]])
-// CHECK:  %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK-LABEL: define spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1
+// CHECK:  %[[tobool:.+]] = icmp eq i32 %a, 0
  // CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
  // CHECK: [[if_then]]:
  // CHECK: tail call spir_func void @f()
@@ -56,7 +70,7 @@ void test_merge_if(int a) {
  // CHECK: br label %[[if_end]]
  // CHECK: [[if_end]]:
  // CHECK:  %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
-// CHECK:  tail call spir_func void @convfun() #[[attr5:.+]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4:.+]]
  // CHECK:  br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
  // CHECK: [[if_then2]]:
  // CHECK: tail call spir_func void @g()
@@ -74,20 +88,20 @@ void test_no_merge_if(int a) {
    }
  }
  
-// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]]
+// CHECK: declare spir_func void @convfun(){{[^#]*}} #2
  
  // Test loop is unrolled for convergent function.
-// CHECK-LABEL: define spir_func void @test_unroll()
-// CHECK:  tail call spir_func void @convfun() #[[attr5:[0-9]+]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
+// CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1
+// CHECK:  tail call spir_func void @convfun() #[[attr4:[0-9]+]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
  // CHECK-LABEL:  ret void
  
  void test_unroll() {
@@ -101,7 +115,7 @@ void test_unroll() {
  // CHECK: [[for_cond_cleanup:.+]]:
  // CHECK:  ret void
  // CHECK: [[for_body]]:
-// CHECK:  tail call spir_func void @nodupfun() #[[attr6:[0-9]+]]
+// CHECK:  tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
  // CHECK-NOT: call spir_func void @nodupfun()
  // CHECK:  br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
  
@@ -112,7 +126,9 @@ void test_not_unroll() {
  
  // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
  
-// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} }
+// CHECK: attributes #0 = { noinline norecurse nounwind "
+// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 6 Oct 2017 19:34:40 +0000 (19:34 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 6 Oct 2017 19:34:40 +0000 (19:34 +0000)
include/clang/Basic/LangOptions.h		patch \| blob \| history
lib/CodeGen/CGCall.cpp		patch \| blob \| history
test/CodeGenOpenCL/amdgpu-attrs.cl		patch \| blob \| history
test/CodeGenOpenCL/convergent.cl		patch \| blob \| history