[LV] Report multiple reasons for not vectorizing under allowExtraAnalysis

author Ayal Zaks <ayal.zaks@intel.com>

Tue, 23 May 2017 07:08:02 +0000 (07:08 +0000)

committer Ayal Zaks <ayal.zaks@intel.com>

Tue, 23 May 2017 07:08:02 +0000 (07:08 +0000)
author Ayal Zaks <ayal.zaks@intel.com>
Tue, 23 May 2017 07:08:02 +0000 (07:08 +0000)
committer Ayal Zaks <ayal.zaks@intel.com>
Tue, 23 May 2017 07:08:02 +0000 (07:08 +0000)
diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst

index 65c19aa2bc0cbfa4f7c0c9f88a91c2b2779a7368..a909d458c3176e3bc5ffc606a0904385fd86fca6 100644 (file)
--- a/docs/Vectorizers.rst
+++ b/docs/Vectorizers.rst
@@ -99,7 +99,9 @@ Optimization remarks are enabled using:
  indicates if vectorization was specified.
  
  ``-Rpass-analysis=loop-vectorize`` identifies the statements that caused
-vectorization to fail.
+vectorization to fail. If in addition ``-fsave-optimization-record`` is
+provided, multiple causes of vectorization failure may be listed (this behavior
+might change in the future).
  
  Consider the following loop:
  
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 1dc554bede7e36d87bb5b69381a68b86666a7ed3..fa8b613f9349cb47173b168c866baf62e72f1e1f 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5086,12 +5086,18 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
  }
  
  bool LoopVectorizationLegality::canVectorize() {
+  // Store the result and return it at the end instead of exiting early, in case
+  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+  bool Result = true;
    // We must have a loop in canonical form. Loops with indirectbr in them cannot
    // be canonicalized.
    if (!TheLoop->getLoopPreheader()) {
      ORE->emit(createMissedAnalysis("CFGNotUnderstood")
                << "loop control flow is not understood by vectorizer");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // FIXME: The code is currently dead, since the loop gets sent to
@@ -5101,21 +5107,30 @@ bool LoopVectorizationLegality::canVectorize() {
    if (!TheLoop->empty()) {
      ORE->emit(createMissedAnalysis("NotInnermostLoop")
                << "loop is not the innermost loop");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // We must have a single backedge.
    if (TheLoop->getNumBackEdges() != 1) {
      ORE->emit(createMissedAnalysis("CFGNotUnderstood")
                << "loop control flow is not understood by vectorizer");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // We must have a single exiting block.
    if (!TheLoop->getExitingBlock()) {
      ORE->emit(createMissedAnalysis("CFGNotUnderstood")
                << "loop control flow is not understood by vectorizer");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // We only handle bottom-tested loops, i.e. loop in which the condition is
@@ -5124,7 +5139,10 @@ bool LoopVectorizationLegality::canVectorize() {
    if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
      ORE->emit(createMissedAnalysis("CFGNotUnderstood")
                << "loop control flow is not understood by vectorizer");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // We need to have a loop header.
@@ -5135,28 +5153,28 @@ bool LoopVectorizationLegality::canVectorize() {
    unsigned NumBlocks = TheLoop->getNumBlocks();
    if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
      DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
-    return false;
-  }
-
-  // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = PSE.getBackedgeTakenCount();
-  if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
-    ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations")
-              << "could not determine number of loop iterations");
-    DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // Check if we can vectorize the instructions and CFG in this loop.
    if (!canVectorizeInstrs()) {
      DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    // Go over each instruction and look at memory deps.
    if (!canVectorizeMemory()) {
      DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
    DEBUG(dbgs() << "LV: We can vectorize this loop"
@@ -5184,13 +5202,17 @@ bool LoopVectorizationLegality::canVectorize() {
                << "Too many SCEV assumptions need to be made and checked "
                << "at runtime");
      DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
-    return false;
+    if (ORE->allowExtraAnalysis())
+      Result = false;
+    else
+      return false;
    }
  
-  // Okay! We can vectorize. At this point we don't have any other mem analysis
+  // Okay! We've done all the tests. If any have failed, return false. Otherwise
+  // we can vectorize, and at this point we don't have any other mem analysis
    // which may limit our maximum vectorization factor, so just return true with
    // no restrictions.
-  return true;
+  return Result;
  }
  
  static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll

index b2933c4b56f204a535e423e9607ff5ac46586f50..4dc62d86453f7c65b66b5511aaa92f47253355e3 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -11,38 +11,38 @@
  ;       break;
  ;   }
  ; }
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized
  
  ; void test_disabled(int *A, int Length) {
  ; #pragma clang loop vectorize(disable) interleave(disable)
  ;   for (int i = 0; i < Length; i++)
  ;     A[i] = i;
  ; }
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
  
  ; void test_array_bounds(int *A, int *B, int Length) {
  ; #pragma clang loop vectorize(enable)
  ;   for (int i = 0; i < Length; i++)
  ;     A[i] = A[B[i]];
  ; }
-
-; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
  ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
  ; CHECK: remark: source.cpp:19:5: loop not vectorized
  ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
  
-; CHECK: _Z4testPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z13test_disabledPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z17test_array_boundsPiS_i
-; CHECK-NOT: x i32>
-; CHECK: ret
+; int foo();
+; void test_multiple_failures(int *A) {
+;   int k = 0;
+; #pragma clang loop vectorize(enable) interleave(enable)
+;   for (int i = 0; i < 1000; i+=A[i]) {
+;     if (A[i])
+;       k = foo();
+;   }
+;   return k;
+; }
+; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select
+; CHECK: remark: source.cpp:27:3: loop not vectorized
  
  ; YAML:       --- !Analysis
  ; YAML-NEXT: Pass:            loop-vectorize
@@ -98,6 +98,41 @@
  ; YAML-NEXT:   - String:          'loop not vectorized: '
  ; YAML-NEXT:   - String:          failed explicitly specified loop vectorization
  ; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            NoCFGForSelect
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 29, Column: 7 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          control flow cannot be substituted for a select
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            NonReductionValueUsedOutsideLoop
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          value that could not be identified as reduction is used outside the loop
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            CantComputeNumberOfIterations
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          could not determine number of loop iterations
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Missed
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            MissedDetails
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          loop not vectorized
+; YAML-NEXT: ...
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  
@@ -124,6 +159,10 @@ for.end:                                          ; preds = %for.body, %entry
    ret void, !dbg !24
  }
  
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
  ; Function Attrs: nounwind optsize ssp uwtable
  define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
  entry:
@@ -144,6 +183,10 @@ for.end:                                          ; preds = %for.body, %entry
    ret void, !dbg !31
  }
  
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
  ; Function Attrs: nounwind optsize ssp uwtable
  define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
  entry:
@@ -174,6 +217,45 @@ for.end:                                          ; preds = %for.end.loopexit, %
    ret void, !dbg !36
  }
  
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; Function Attrs: nounwind uwtable
+define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+entry:
+  br label %for.body, !dbg !38
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+  %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+  %tobool = icmp eq i32 %0, 0, !dbg !40
+  br i1 %tobool, label %for.inc, label %if.then, !dbg !40
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (...) @foo(), !dbg !41
+  %.pre = load i32, i32* %arrayidx, align 4
+  br label %for.inc, !dbg !42
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43
+  %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ]
+  %add = add nsw i32 %1, %i.09, !dbg !44
+  %cmp = icmp slt i32 %add, 1000, !dbg !45
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  ret i32 %k.1, !dbg !39
+}
+
+declare i32 @foo(...)
+
+; CHECK: test_multiple_failure
+; CHECK-NOT: x i32>
+; CHECK: ret
+
  attributes #0 = { nounwind }
  
  !llvm.dbg.cu = !{!0}
@@ -216,3 +298,13 @@ attributes #0 = { nounwind }
  !34 = !{!34, !15}
  !35 = !DILocation(line: 19, column: 5, scope: !33)
  !36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2)
author	Ayal Zaks <ayal.zaks@intel.com>
	Tue, 23 May 2017 07:08:02 +0000 (07:08 +0000)
committer	Ayal Zaks <ayal.zaks@intel.com>
	Tue, 23 May 2017 07:08:02 +0000 (07:08 +0000)
docs/Vectorizers.rst		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll		patch \| blob \| history