[LV] Considier non-consecutive but vectorizable accesses for VF selection

author Matthew Simpson <mssimpso@codeaurora.org>

Thu, 2 Mar 2017 13:55:05 +0000 (13:55 +0000)

committer Matthew Simpson <mssimpso@codeaurora.org>

Thu, 2 Mar 2017 13:55:05 +0000 (13:55 +0000)
author Matthew Simpson <mssimpso@codeaurora.org>
Thu, 2 Mar 2017 13:55:05 +0000 (13:55 +0000)
committer Matthew Simpson <mssimpso@codeaurora.org>
Thu, 2 Mar 2017 13:55:05 +0000 (13:55 +0000)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 36570b49bb0b385d05a577d8b382e89d92aa8991..763ce083837753f0fbaadd402f0b8f23052fb729 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6326,9 +6326,16 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
          T = ST->getValueOperand()->getType();
  
        // Ignore loaded pointer types and stored pointer types that are not
-      // consecutive. However, we do want to take consecutive stores/loads of
-      // pointer vectors into account.
-      if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I))
+      // vectorizable.
+      //
+      // FIXME: The check here attempts to predict whether a load or store will
+      //        be vectorized. We only know this for certain after a VF has
+      //        been selected. Here, we assume that if an access can be
+      //        vectorized, it will be. We should also look at extending this
+      //        optimization to non-pointer types.
+      //
+      if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) &&
+          !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I))
          continue;
  
        MinWidth = std::min(MinWidth,
diff --git a/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll

new file mode 100644 (file)

index 0000000..1ae7dad
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: Checking a loop in "interleaved_access"
+; CHECK:         The Smallest and Widest types: 64 / 64 bits
+;
+define void @interleaved_access(i8** %A, i64 %N) {
+for.ph:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next.3, %for.body ], [ 0, %for.ph ]
+  %tmp0 = getelementptr inbounds i8*, i8** %A, i64 %i
+  store i8* null, i8** %tmp0, align 8
+  %i.next.0 = add nuw nsw i64 %i, 1
+  %tmp1 = getelementptr inbounds i8*, i8** %A, i64 %i.next.0
+  store i8* null, i8** %tmp1, align 8
+  %i.next.1 = add nsw i64 %i, 2
+  %tmp2 = getelementptr inbounds i8*, i8** %A, i64 %i.next.1
+  store i8* null, i8** %tmp2, align 8
+  %i.next.2 = add nsw i64 %i, 3
+  %tmp3 = getelementptr inbounds i8*, i8** %A, i64 %i.next.2
+  store i8* null, i8** %tmp3, align 8
+  %i.next.3 = add nsw i64 %i, 4
+  %cond = icmp slt i64 %i.next.3, %N
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
author	Matthew Simpson <mssimpso@codeaurora.org>
	Thu, 2 Mar 2017 13:55:05 +0000 (13:55 +0000)
committer	Matthew Simpson <mssimpso@codeaurora.org>
	Thu, 2 Mar 2017 13:55:05 +0000 (13:55 +0000)
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll	[new file with mode: 0644]	patch \| blob