[DebugInfo@O2][LoopVectorize] pr39024: Vectorized code linenos step through loop...

author Orlando Cazalet-Hyams <orlando.hyams@sony.com>

Tue, 7 May 2019 15:37:38 +0000 (15:37 +0000)

committer Orlando Cazalet-Hyams <orlando.hyams@sony.com>

Tue, 7 May 2019 15:37:38 +0000 (15:37 +0000)
author Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Tue, 7 May 2019 15:37:38 +0000 (15:37 +0000)
committer Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Tue, 7 May 2019 15:37:38 +0000 (15:37 +0000)
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp

index aa7b933022c00ddbf0b8bffefc70b1f3778f65b5..a04092e54a9b626d1dc4660fff7d72af8e42dca2 100644 (file)
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -566,7 +566,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
  
    // The new block unconditionally branches to the old block.
    BranchInst *BI = BranchInst::Create(BB, NewBB);
-  BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
+  // Splitting the precedessors of a loop header creates a preheader block.
+  if (LI && LI->isLoopHeader(BB))
+    // Using the loop start line number prevents debuggers stepping into the
+    // loop body for this instruction.
+    BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
+  else
+    BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
  
    // Move the edges from Preds to point to NewBB instead of BB.
    for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 4f043448e4eb4913209ab98da323d7ebe3243f61..806cd52cbc2bd36cc6a2bb6f72618fb53352b559 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2894,11 +2894,11 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
          CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
                          CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
  
-    // Provide correct stepping behaviour by using the same DebugLoc as the
-    // scalar loop latch branch cmp if it exists.
-    if (CmpInst *ScalarLatchCmp =
-            dyn_cast_or_null<CmpInst>(ScalarLatchBr->getCondition()))
-      cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchCmp->getDebugLoc());
+    // Here we use the same DebugLoc as the scalar loop latch branch instead
+    // of the corresponding compare because they may have ended up with
+    // different line numbers and we want to avoid awkward line stepping while
+    // debugging. Eg. if the compare has got a line number inside the loop.
+    cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
    }
  
    BranchInst *BrInst = BranchInst::Create(ExitBlock, ScalarPH, CmpN);
@@ -3631,7 +3631,15 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
    // Reduce all of the unrolled parts into a single vector.
    Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
    unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
-  setDebugLocFromInst(Builder, ReducedPartRdx);
+
+  // The middle block terminator has already been assigned a DebugLoc here (the
+  // OrigLoop's single latch terminator). We want the whole middle block to
+  // appear to execute on this line because: (a) it is all compiler generated,
+  // (b) these instructions are always executed after evaluating the latch
+  // conditional branch, and (c) other passes may add new predecessors which
+  // terminate on this line. This is the easiest way to ensure we don't
+  // accidently cause an extra step back into the loop while debugging.
+  setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
    for (unsigned Part = 1; Part < UF; ++Part) {
      Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
diff --git a/test/Transforms/LoopSimplify/dbg-loc.ll b/test/Transforms/LoopSimplify/dbg-loc.ll

index efd5e8e71c923b513f3c815827f763b8dcc5d8ec..4bc519338cbfedb3a6b19cffb14fa010f1e1f78b 100644 (file)
--- a/test/Transforms/LoopSimplify/dbg-loc.ll
+++ b/test/Transforms/LoopSimplify/dbg-loc.ll
@@ -72,7 +72,7 @@ eh.resume:                                        ; preds = %catch
  ; Function Attrs: nounwind readnone
  declare void @llvm.dbg.value(metadata, metadata, metadata)
  
-; CHECK-DAG: [[PREHEADER_LOC]] = !DILocation(line: 73, column: 27, scope: !{{[0-9]+}})
+; CHECK-DAG: [[PREHEADER_LOC]] = !DILocation(line: 73, column: 13, scope: !{{[0-9]+}})
  ; CHECK-DAG: [[LOOPEXIT_LOC]] = !DILocation(line: 75, column: 9, scope: !{{[0-9]+}})
  ; CHECK-DAG: [[LPAD_PREHEADER_LOC]] = !DILocation(line: 85, column: 1, scope: !{{[0-9]+}})
  
diff --git a/test/Transforms/LoopSimplify/do-preheader-dbg.ll b/test/Transforms/LoopSimplify/do-preheader-dbg.ll

new file mode 100755 (executable)

index 0000000..7cacc49
--- /dev/null
+++ b/test/Transforms/LoopSimplify/do-preheader-dbg.ll
@@ -0,0 +1,122 @@
+; Confirm that the line number for the do.body.preheader block
+; branch is the the start of the loop.
+
+; RUN: opt -simplifycfg -loop-simplify -keep-loops="false" -S <%s | FileCheck %s
+
+; CHECK: do.body.preheader:
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: br label %do.body, !dbg ![[DL:[0-9]+]]
+; CHECK: ![[DL]] = !DILocation(line: 4,
+
+; This IR can be generated by running:
+; clang src.cpp -O2 -g -S -emit-llvm -mllvm -opt-bisect-limit=62 -o -
+;
+; Where  src.cpp contains:
+; int foo(char *Bytes, int Count)
+; {
+;     int Total = 0;
+;     do
+;         Total += Bytes[--Count];
+;     while (Count);
+;     return Total;
+; }
+
+define dso_local i32 @"foo"(i8* nocapture readonly %Bytes, i32 %Count) local_unnamed_addr !dbg !8 {
+entry:
+  %0 = sext i32 %Count to i64, !dbg !10
+  %min.iters.check = icmp ult i32 %Count, 8, !dbg !10
+  br i1 %min.iters.check, label %do.body.preheader, label %vector.ph, !dbg !10
+
+vector.ph:                                        ; preds = %entry
+  %n.vec = and i64 %0, -8, !dbg !10
+  %ind.end = sub nsw i64 %0, %n.vec, !dbg !10
+  br label %vector.body, !dbg !10
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ]
+  %vec.phi5 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
+  %1 = xor i64 %index, -1, !dbg !11
+  %2 = add i64 %1, %0, !dbg !11
+  %3 = getelementptr inbounds i8, i8* %Bytes, i64 %2, !dbg !11
+  %4 = getelementptr inbounds i8, i8* %3, i64 -3, !dbg !11
+  %5 = bitcast i8* %4 to <4 x i8>*, !dbg !11
+  %wide.load = load <4 x i8>, <4 x i8>* %5, align 1, !dbg !11, !tbaa !12
+  %reverse = shufflevector <4 x i8> %wide.load, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, !dbg !11
+  %6 = getelementptr inbounds i8, i8* %3, i64 -4, !dbg !11
+  %7 = getelementptr inbounds i8, i8* %6, i64 -3, !dbg !11
+  %8 = bitcast i8* %7 to <4 x i8>*, !dbg !11
+  %wide.load6 = load <4 x i8>, <4 x i8>* %8, align 1, !dbg !11, !tbaa !12
+  %reverse7 = shufflevector <4 x i8> %wide.load6, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, !dbg !11
+  %9 = sext <4 x i8> %reverse to <4 x i32>, !dbg !11
+  %10 = sext <4 x i8> %reverse7 to <4 x i32>, !dbg !11
+  %11 = add nsw <4 x i32> %vec.phi, %9, !dbg !11
+  %12 = add nsw <4 x i32> %vec.phi5, %10, !dbg !11
+  %index.next = add i64 %index, 8
+  %13 = icmp eq i64 %index.next, %n.vec
+  br i1 %13, label %middle.block, label %vector.body, !llvm.loop !15
+
+middle.block:                                     ; preds = %vector.body
+  %.lcssa12 = phi <4 x i32> [ %11, %vector.body ], !dbg !11
+  %.lcssa = phi <4 x i32> [ %12, %vector.body ], !dbg !11
+  %bin.rdx = add <4 x i32> %.lcssa, %.lcssa12, !dbg !11
+  %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !11
+  %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf, !dbg !11
+  %rdx.shuf9 = shufflevector <4 x i32> %bin.rdx8, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !11
+  %bin.rdx10 = add <4 x i32> %bin.rdx8, %rdx.shuf9, !dbg !11
+  %14 = extractelement <4 x i32> %bin.rdx10, i32 0, !dbg !11
+  %cmp.n = icmp eq i64 %n.vec, %0
+  br i1 %cmp.n, label %do.end, label %do.body.preheader, !dbg !10
+
+do.body.preheader:                                ; preds = %middle.block, %entry
+  %indvars.iv.ph = phi i64 [ %0, %entry ], [ %ind.end, %middle.block ]
+  %Total.0.ph = phi i32 [ 0, %entry ], [ %14, %middle.block ]
+  br label %do.body, !dbg !11
+
+do.body:                                          ; preds = %do.body.preheader, %do.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %do.body ], [ %indvars.iv.ph, %do.body.preheader ]
+  %Total.0 = phi i32 [ %add, %do.body ], [ %Total.0.ph, %do.body.preheader ], !dbg !18
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1, !dbg !11
+  %arrayidx = getelementptr inbounds i8, i8* %Bytes, i64 %indvars.iv.next, !dbg !11
+  %15 = load i8, i8* %arrayidx, align 1, !dbg !11, !tbaa !12
+  %conv = sext i8 %15 to i32, !dbg !11
+  %add = add nsw i32 %Total.0, %conv, !dbg !11
+  %16 = icmp eq i64 %indvars.iv.next, 0
+  br i1 %16, label %do.end.loopexit, label %do.body, !dbg !11, !llvm.loop !19
+
+do.end.loopexit:                                  ; preds = %do.body
+  %add.lcssa11 = phi i32 [ %add, %do.body ], !dbg !11
+  br label %do.end, !dbg !21
+
+do.end:                                           ; preds = %do.end.loopexit, %middle.block
+  %add.lcssa = phi i32 [ %14, %middle.block ], [ %add.lcssa11, %do.end.loopexit ], !dbg !11
+  ret i32 %add.lcssa, !dbg !21
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "src2.cpp", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"CodeView", i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!""}
+!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 4, scope: !8)
+!11 = !DILocation(line: 5, scope: !8)
+!12 = !{!13, !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C++ TBAA"}
+!15 = distinct !{!15, !10, !16, !17}
+!16 = !DILocation(line: 6, scope: !8)
+!17 = !{!"llvm.loop.isvectorized", i32 1}
+!18 = !DILocation(line: 0, scope: !8)
+!19 = distinct !{!19, !10, !16, !20, !17}
+!20 = !{!"llvm.loop.unroll.runtime.disable"}
+!21 = !DILocation(line: 7, scope: !8)
diff --git a/test/Transforms/LoopSimplify/for-preheader-dbg.ll b/test/Transforms/LoopSimplify/for-preheader-dbg.ll

new file mode 100755 (executable)

index 0000000..439b720
--- /dev/null
+++ b/test/Transforms/LoopSimplify/for-preheader-dbg.ll
@@ -0,0 +1,102 @@
+; Confirm that the line number for the for.body.preheader block
+; branch is the the start of the loop.
+
+; RUN: opt -simplifycfg -loop-simplify -S <%s | FileCheck %s
+;
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label %for.body, !dbg ![[DL:[0-9]+]]
+; CHECK: ![[DL]] = !DILocation(line: 8,
+
+; This IR can be generated by running:
+; clang  src.cpp -O0 -g -S -emit-llvm -Xclang -disable-O0-optnone -o - | \
+; opt -O2 -S -opt-bisect-limit=27 -o -
+;
+; Where  src.cpp contains:
+; int foo(int count, int *bar)
+; {
+;   if (count + 1 > 256)
+;     return 0;
+;
+;   int ret = count;
+;   int tmp;
+;   for (int j = 0; j < count; j++) {
+;     tmp = bar[j];
+;     ret += tmp;
+;   }
+;
+;   return ret;
+; }
+
+define dso_local i32 @"foo"(i32 %count, i32* nocapture readonly %bar) local_unnamed_addr !dbg !8 {
+entry:
+  %cmp = icmp sgt i32 %count, 255, !dbg !16
+  br i1 %cmp, label %return, label %for.cond.preheader, !dbg !16
+
+for.cond.preheader:                               ; preds = %entry
+  %cmp16 = icmp slt i32 0, %count, !dbg !19
+  br i1 %cmp16, label %for.body.lr.ph, label %return.loopexit, !dbg !19
+
+for.body.lr.ph:                                   ; preds = %for.cond.preheader
+  br label %for.body, !dbg !19
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %j.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %ret.07 = phi i32 [ %count, %for.body.lr.ph ], [ %add2, %for.body ]
+  %0 = zext i32 %j.08 to i64, !dbg !22
+  %arrayidx = getelementptr inbounds i32, i32* %bar, i64 %0, !dbg !22
+  %1 = load i32, i32* %arrayidx, align 4, !dbg !22
+  %add2 = add nsw i32 %1, %ret.07, !dbg !27
+  %inc = add nuw nsw i32 %j.08, 1, !dbg !28
+  %cmp1 = icmp slt i32 %inc, %count, !dbg !19
+  br i1 %cmp1, label %for.body, label %for.cond.return.loopexit_crit_edge, !dbg !19, !llvm.loop !29
+
+for.cond.return.loopexit_crit_edge:               ; preds = %for.body
+  %split = phi i32 [ %add2, %for.body ]
+  br label %return.loopexit, !dbg !19
+
+return.loopexit:                                  ; preds = %for.cond.return.loopexit_crit_edge, %for.cond.preheader
+  %ret.0.lcssa = phi i32 [ %split, %for.cond.return.loopexit_crit_edge ], [ %count, %for.cond.preheader ], !dbg !31
+  br label %return, !dbg !32
+
+return:                                           ; preds = %return.loopexit, %entry
+  %retval.0 = phi i32 [ 0, %entry ], [ %ret.0.lcssa, %return.loopexit ], !dbg !31
+  ret i32 %retval.0, !dbg !32
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "src.cpp", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"CodeView", i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!""}
+!8 = distinct !DISubprogram(name: "foo", linkageName: "?foo@@YAHHPEAH@Z", scope: !1, file: !1, line: 1, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !11, !12}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!13 = !DILocalVariable(name: "bar", arg: 2, scope: !8, file: !1, line: 1, type: !12)
+!14 = !DILocation(line: 1, scope: !8)
+!15 = !DILocalVariable(name: "count", arg: 1, scope: !8, file: !1, line: 1, type: !11)
+!16 = !DILocation(line: 3, scope: !8)
+!17 = !DILocalVariable(name: "j", scope: !18, file: !1, line: 8, type: !11)
+!18 = distinct !DILexicalBlock(scope: !8, file: !1, line: 8)
+!19 = !DILocation(line: 8, scope: !18)
+!20 = !DILocalVariable(name: "ret", scope: !8, file: !1, line: 6, type: !11)
+!21 = !DILocation(line: 6, scope: !8)
+!22 = !DILocation(line: 9, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !24, file: !1, line: 8)
+!24 = distinct !DILexicalBlock(scope: !18, file: !1, line: 8)
+!25 = !DILocalVariable(name: "tmp", scope: !8, file: !1, line: 7, type: !11)
+!26 = !DILocation(line: 7, scope: !8)
+!27 = !DILocation(line: 10, scope: !23)
+!28 = !DILocation(line: 8, scope: !24)
+!29 = distinct !{!29, !19, !30}
+!30 = !DILocation(line: 11, scope: !18)
+!31 = !DILocation(line: 0, scope: !8)
+!32 = !DILocation(line: 14, scope: !8)
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll

index fb42443f326b84489ac8aca254d567dce9f128c5..7e4866ced4baff003a04871cb647e21f2b07112b 100644 (file)
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -10,31 +10,29 @@
  ; EPILOG: for.body.preheader:
  ; EPILOG:   br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
  ; EPILOG: for.body:
-; EPILOG:   br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
+; EPILOG:   br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[PH_LOC]]
  ; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
  ; EPILOG: for.body.epil.preheader:
-; EPILOG:   br label %for.body.epil, !dbg [[BODY_LOC]]
+; EPILOG:   br label %for.body.epil, !dbg [[PH_LOC]]
  ; EPILOG: for.body.epil:
-; EPILOG:   br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC]]
+; EPILOG:   br label %for.end.loopexit.epilog-lcssa, !dbg [[PH_LOC]]
  ; EPILOG: for.end.loopexit:
  ; EPILOG:   br label %for.end, !dbg [[EXIT_LOC:![0-9]+]]
  
-; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
-; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
  ; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}})
  
  ; PROLOG: for.body.preheader:
  ; PROLOG:   br {{.*}} label %for.body.prol.preheader, label %for.body.prol.loopexit, !dbg [[PH_LOC:![0-9]+]]
  ; PROLOG: for.body.prol:
-; PROLOG:   br label %for.body.prol.loopexit, !dbg [[BODY_LOC:![0-9]+]]
+; PROLOG:   br label %for.body.prol.loopexit, !dbg [[PH_LOC:![0-9]+]]
  ; PROLOG: for.body.prol.loopexit:
  ; PROLOG:   br {{.*}} label %for.end.loopexit, label %for.body.preheader.new, !dbg [[PH_LOC]]
  ; PROLOG: for.body:
-; PROLOG:   br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
+; PROLOG:   br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[PH_LOC]]
  ; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
  
-; PROLOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
-; PROLOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; PROLOG-DAG: [[PH_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
  
  define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
  entry:
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll

index 4aa96df94fffde29a5caa6844912b3f6254f6f6c..a90ce99268b37cb52a059b4b301e7a13137b43ee 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -16,15 +16,15 @@
  ;   }
  ; }
  ; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized
+; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:5:9: loop not vectorized
  
  ; void test_disabled(int *A, int Length) {
  ; #pragma clang loop vectorize(disable) interleave(disable)
  ;   for (int i = 0; i < Length; i++)
  ;     A[i] = i;
  ; }
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized
+; CHECK: remark: source.cpp:12:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized
  
  ; void test_array_bounds(int *A, int *B, int Length) {
  ; #pragma clang loop vectorize(enable)
@@ -51,7 +51,7 @@
  ; YAML:       --- !Analysis
  ; YAML-NEXT: Pass:            loop-vectorize
  ; YAML-NEXT: Name:            CantComputeNumberOfIterations
-; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 4, Column: 5 }
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 5, Column: 9 }
  ; YAML-NEXT: Function:        _Z4testPii
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          'loop not vectorized: '
@@ -60,7 +60,7 @@
  ; YAML-NEXT: --- !Missed
  ; YAML-NEXT: Pass:            loop-vectorize
  ; YAML-NEXT: Name:            MissedDetails
-; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 4, Column: 5 }
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 5, Column: 9 }
  ; YAML-NEXT: Function:        _Z4testPii
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          loop not vectorized
@@ -68,7 +68,7 @@
  ; YAML-NEXT: --- !Analysis
  ; YAML-NEXT: Pass:            loop-vectorize
  ; YAML-NEXT: Name:            AllDisabled
-; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 13, Column: 5 }
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 12, Column: 8 }
  ; YAML-NEXT: Function:        _Z13test_disabledPii
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          'loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll

index ac0648a683897c6cb20d3afd2cc4cdf901ebf4e1..228515efc70123e8a0fb916c81fc48cbbc471b12 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -3,8 +3,8 @@
  ; Verify analysis remarks are generated when interleaving is not beneficial.
  ; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that vectorization is not beneficial
  ; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that interleaving is not beneficial and is explicitly disabled or interleave count is set to 1
-; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that vectorization is not beneficial
-; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that interleaving is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:11:3: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:11:3: the cost-model indicates that interleaving is not beneficial
  
  ; First loop.
  ;  #pragma clang loop interleave(disable) unroll(disable)
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll

index e9ec8662ce46a03b34f60ae49d2b15d4a1117275..358f49b992697fb7e84c470d0566b712d021d662 100644 (file)
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -14,8 +14,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
  ; CHECK:   add i64 %index, 2, !dbg ![[LOC]]
  ; CHECK:   icmp eq i64 %index.next, %n.vec, !dbg ![[LOC]]
  ; CHECK: middle.block
-; CHECK:   add <2 x i32> %{{.*}}, %rdx.shuf, !dbg ![[LOC]]
-; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC]]
+; CHECK:   add <2 x i32> %{{.*}}, %rdx.shuf, !dbg ![[BR_LOC:[0-9]+]]
+; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[BR_LOC]]
+; CHECK: for.body
+; CHECK br i1{{.*}}, label %for.body,{{.*}}, !dbg ![[BR_LOC]],
+; CHECK: ![[BR_LOC]] = !DILocation(line: 5,
  
  define i32 @f(i32* nocapture %a, i32 %size) #0 !dbg !4 {
  entry:
@@ -38,7 +41,7 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
    %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !22
    call void @llvm.dbg.value(metadata !{null}, metadata !16, metadata !DIExpression()), !dbg !22
    %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !22
-  %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !22
+  %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21
    br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
  
  for.cond.for.end_crit_edge:                       ; preds = %for.body
diff --git a/test/Transforms/LoopVectorize/fix-reduction-dbg.ll b/test/Transforms/LoopVectorize/fix-reduction-dbg.ll

new file mode 100755 (executable)

index 0000000..457bdac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
@@ -0,0 +1,87 @@
+; Confirm that the line numbers for the middle.block operations are all the
+; same as the start of the loop.
+
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 <%s | FileCheck %s
+;
+; CHECK: middle.block:
+; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL:[0-9]+]]
+; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= shufflevector <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= shufflevector <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= extractelement <4 x i32>{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: %{{.*}}= icmp eq i64{{.*}}, !dbg ![[DL]]
+; CHECK-NEXT: br i1 %{{.*}}, !dbg ![[DL]]
+; CHECK: ![[DL]] = !DILocation(line: 5,
+
+; This IR can be generated by running:
+; clang -gmlt -S src.cpp -emit-llvm -mllvm -opt-bisect-limit=56 -O2 -o -
+;
+; Where  src.cpp contains:
+; int foo(int count, int *bar)
+; {
+;   int ret = count;
+;   int tmp;
+;   for (int j = 0; j < count; j++) {
+;     tmp = bar[j];
+;     ret += tmp;
+;   }
+;
+;   return ret;
+; }
+
+define dso_local i32 @"foo"(i32 %count, i32* nocapture readonly %bar) local_unnamed_addr !dbg !8 {
+entry:
+  %cmp8 = icmp sgt i32 %count, 0, !dbg !10
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup, !dbg !10
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %count to i64
+  br label %for.body, !dbg !11
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ], !dbg !12
+  br label %for.cond.cleanup, !dbg !13
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %ret.0.lcssa = phi i32 [ %count, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ], !dbg !14
+  ret i32 %ret.0.lcssa, !dbg !13
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %ret.09 = phi i32 [ %count, %for.body.preheader ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %bar, i64 %indvars.iv, !dbg !11
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !11, !tbaa !15
+  %add = add nsw i32 %0, %ret.09, !dbg !12
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !10
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !10, !llvm.loop !19
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "src.cpp", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"CodeView", i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!""}
+!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 5, scope: !8)
+!11 = !DILocation(line: 6, scope: !8)
+!12 = !DILocation(line: 7, scope: !8)
+!13 = !DILocation(line: 10, scope: !8)
+!14 = !DILocation(line: 0, scope: !8)
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !17, i64 0}
+!17 = !{!"omnipotent char", !18, i64 0}
+!18 = !{!"Simple C++ TBAA"}
+!19 = distinct !{!19, !10, !20}
+!20 = !DILocation(line: 8, scope: !8)
diff --git a/test/Transforms/LoopVectorize/unsafe-dep-remark.ll b/test/Transforms/LoopVectorize/unsafe-dep-remark.ll

index 78e128d897e96f7697d5fc9242d9c7d175a700c1..be20be538173b45c68199a19b5fbb8e2322a46b2 100644 (file)
--- a/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
+++ b/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
@@ -11,7 +11,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  ;     5          }
  ;     6        }
  
-; CHECK: remark: /tmp/kk.c:3:16: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK: remark: /tmp/kk.c:2:3: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
  
  define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 {
  entry:
author	Orlando Cazalet-Hyams <orlando.hyams@sony.com>
	Tue, 7 May 2019 15:37:38 +0000 (15:37 +0000)
committer	Orlando Cazalet-Hyams <orlando.hyams@sony.com>
	Tue, 7 May 2019 15:37:38 +0000 (15:37 +0000)
lib/Transforms/Utils/BasicBlockUtils.cpp		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopSimplify/dbg-loc.ll		patch \| blob \| history
test/Transforms/LoopSimplify/do-preheader-dbg.ll	[new file with mode: 0755]	patch \| blob
test/Transforms/LoopSimplify/for-preheader-dbg.ll	[new file with mode: 0755]	patch \| blob
test/Transforms/LoopUnroll/runtime-loop1.ll		patch \| blob \| history
test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll		patch \| blob \| history
test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll		patch \| blob \| history
test/Transforms/LoopVectorize/debugloc.ll		patch \| blob \| history
test/Transforms/LoopVectorize/fix-reduction-dbg.ll	[new file with mode: 0755]	patch \| blob
test/Transforms/LoopVectorize/unsafe-dep-remark.ll		patch \| blob \| history