[OPENMP 4.1] Improved codegen for 'uval' qualifier of 'linear' clause.

author Alexey Bataev <a.bataev@hotmail.com>

Fri, 21 Aug 2015 06:41:23 +0000 (06:41 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Fri, 21 Aug 2015 06:41:23 +0000 (06:41 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Fri, 21 Aug 2015 06:41:23 +0000 (06:41 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Fri, 21 Aug 2015 06:41:23 +0000 (06:41 +0000)
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp

index d2653b7cb905fb70d0c3fff9d1927d1e994e8764..5d86cc23f007ecd41a5783eb8b76c627f94b7de1 100644 (file)
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -6423,7 +6423,12 @@ OMPClause *Sema::ActOnOpenMPLinearClause(
          *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc());
      // Build var to save initial value.
      VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start");
-    AddInitializerToDecl(Init, DefaultLvalueConversion(DE).get(),
+    Expr *InitExpr;
+    if (LinKind == OMPC_LINEAR_uval)
+      InitExpr = VD->getInit();
+    else
+      InitExpr = DE;
+    AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(),
                           /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
      auto InitRef = buildDeclRefExpr(
          *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc());
@@ -6491,15 +6496,20 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
    bool HasErrors = false;
    auto CurInit = Clause.inits().begin();
    auto CurPrivate = Clause.privates().begin();
+  auto LinKind = Clause.getModifier();
    for (auto &RefExpr : Clause.varlists()) {
      Expr *InitExpr = *CurInit;
  
      // Build privatized reference to the current linear var.
      auto DE = cast<DeclRefExpr>(RefExpr);
-    auto CapturedRef =
-        buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
-                         DE->getType().getUnqualifiedType(), DE->getExprLoc(),
-                         /*RefersToCapture=*/true);
+    Expr *CapturedRef;
+    if (LinKind == OMPC_LINEAR_uval)
+      CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();
+    else
+      CapturedRef =
+          buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
+                           DE->getType().getUnqualifiedType(), DE->getExprLoc(),
+                           /*RefersToCapture=*/true);
  
      // Build update: Var = InitExpr + IV * Step
      ExprResult Update =
diff --git a/test/OpenMP/simd_codegen.cpp b/test/OpenMP/simd_codegen.cpp

index e67ad5d4c7e07323780ac8ce36ef1812fe0946a1..bea4647927410237f72972ef4fd1f2c7caba061c 100644 (file)
--- a/test/OpenMP/simd_codegen.cpp
+++ b/test/OpenMP/simd_codegen.cpp
@@ -481,6 +481,135 @@ void widened(float *a, float *b, float *c, float *d) {
  // CHECK: ret void
  }
  
+// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}})
+void linear(float *a) {
+  // CHECK: [[VAL_ADDR:%.+]] = alloca i64,
+  // CHECK: [[K_ADDR:%.+]] = alloca i64*,
+  long long val = 0;
+  long long &k = val;
+
+  #pragma omp simd linear(k : 3)
+// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+
+  #pragma omp simd linear(val(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+  #pragma omp simd linear(uval(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
+//
+}
+
  // TERM_DEBUG-LABEL: bar
  int bar() {return 0;};
author	Alexey Bataev <a.bataev@hotmail.com>
	Fri, 21 Aug 2015 06:41:23 +0000 (06:41 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Fri, 21 Aug 2015 06:41:23 +0000 (06:41 +0000)
lib/Sema/SemaOpenMP.cpp		patch \| blob \| history
test/OpenMP/simd_codegen.cpp		patch \| blob \| history