Summary:
Emit !llvm.mem.parallel_loop_access metadata for memory accesses even if the parallel loop is not the top on the loop stack.
Fixes llvm.org/PR37558.
Reviewers: ABataev, hfinkel, amusman, tyler.nowicki
Reviewed By: hfinkel
Subscribers: Meinersbur, hfinkel, cfe-commits
Differential Revision: https://reviews.llvm.org/D48808
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@338810
91177308-0d34-0410-b5e6-
96231b3b80d8
return;
}
- if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
- I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
+ if (I->mayReadOrWriteMemory()) {
+ SmallVector<Metadata *, 2> ParallelLoopIDs;
+ for (const LoopInfo &AL : Active)
+ if (AL.getAttributes().IsParallel)
+ ParallelLoopIDs.push_back(AL.getLoopID());
+
+ MDNode *ParallelMD = nullptr;
+ if (ParallelLoopIDs.size() == 1)
+ ParallelMD = cast<MDNode>(ParallelLoopIDs[0]);
+ else if (ParallelLoopIDs.size() >= 2)
+ ParallelMD = MDNode::get(I->getContext(), ParallelLoopIDs);
+ I->setMetadata("llvm.mem.parallel_loop_access", ParallelMD);
+ }
}
--- /dev/null
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+// Verify that the inner access is tagged with a parallel_loop_access
+// for the inner and outer loop using a list.
+void vectorize_nested_test(int *List, int Length) {
+#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
+ for (int i = 0; i < Length; ++i) {
+#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
+ for (int j = 0; j < Length; ++j)
+ List[i * Length + j] = (i + j) * 2;
+ }
+}
+
+// CHECK: %[[MUL:.+]] = mul
+// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.mem.parallel_loop_access ![[PARALLEL_LIST:[0-9]+]]
+// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
+// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID:[0-9]+]]
+
+// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]],
+// CHECK: ![[PARALLEL_LIST]] = !{![[OUTER_LOOPID]], ![[INNER_LOOPID]]}
+// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]],
--- /dev/null
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+// Verify that the inner access is tagged with a parallel_loop_access
+// for the outer loop.
+void vectorize_outer_test(int *List, int Length) {
+#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
+ for (int i = 0; i < Length; i += 2) {
+#pragma clang loop unroll(full)
+ for (int j = 0; j < 2; j += 1)
+ List[i + j] = (i + j) * 2;
+ }
+}
+
+// CHECK: %[[MUL:.+]] = mul
+// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.mem.parallel_loop_access ![[OUTER_LOOPID:[0-9]+]]
+// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
+// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID]]
+
+// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]],
+// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]],