From: Daniel Berlin Date: Sat, 7 Jan 2017 16:55:14 +0000 (+0000) Subject: NewGVN: Make sure we properly lookup operand leaders while creating X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a783424031b67ed77486393a165718073594bac0;p=llvm NewGVN: Make sure we properly lookup operand leaders while creating congruence classes for stores, and then keep them up to date. Add testcases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291351 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 0f782333a1a..0f265fd6e31 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -337,8 +337,11 @@ private: // New instruction creation. void handleNewInstruction(Instruction *){}; + + // Various instruction touch utilities void markUsersTouched(Value *); void markMemoryUsersTouched(MemoryAccess *); + void markLeaderChangeTouched(CongruenceClass *CC); // Utilities. void cleanupTables(); @@ -1022,11 +1025,22 @@ void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) { } } +// Touch the instructions that need to be updated after a congruence class has a +// leader change, and mark changed values. +void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) { + for (auto M : CC->Members) { + if (auto *I = dyn_cast(M)) + TouchedInstructions.set(InstrDFS[I]); + ChangedValues.insert(M); + } +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { ValueToExpression[V] = E; // This is guaranteed to return something, since it will at least find // INITIAL. + CongruenceClass *VClass = ValueToClass[V]; assert(VClass && "Should have found a vclass"); // Dead classes should have been eliminated from the mapping. @@ -1045,14 +1059,17 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { place->second = NewClass; // Constants and variables should always be made the leader. - if (const auto *CE = dyn_cast(E)) + if (const auto *CE = dyn_cast(E)) { NewClass->RepLeader = CE->getConstantValue(); - else if (const auto *VE = dyn_cast(E)) - NewClass->RepLeader = VE->getVariableValue(); - else if (const auto *SE = dyn_cast(E)) - NewClass->RepLeader = SE->getStoreInst()->getValueOperand(); - else + } else if (const auto *SE = dyn_cast(E)) { + StoreInst *SI = SE->getStoreInst(); + NewClass->RepLeader = + lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + } else { NewClass->RepLeader = V; + } + assert(!isa(E) && + "VariableExpression should have been handled already"); EClass = NewClass; DEBUG(dbgs() << "Created new congruence class for " << *V @@ -1091,14 +1108,11 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { ExpressionToClass.erase(VClass->DefiningExpr); } } else if (VClass->RepLeader == V) { - // FIXME: When the leader changes, the value numbering of - // everything may change, so we need to reprocess. + // When the leader changes, the value numbering of + // everything may change due to symbolization changes, so we need to + // reprocess. VClass->RepLeader = *(VClass->Members.begin()); - for (auto M : VClass->Members) { - if (auto *I = dyn_cast(M)) - TouchedInstructions.set(InstrDFS[I]); - ChangedValues.insert(M); - } + markLeaderChangeTouched(VClass); } } @@ -1120,6 +1134,27 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { markMemoryUsersTouched(MA); } } + } else if (StoreInst *SI = dyn_cast(V)) { + // There is, sadly, one complicating thing for stores. Stores do not + // produce values, only consume them. However, in order to make loads and + // stores value number the same, we ignore the value operand of the store. + // But the value operand will still be the leader of our class, and thus, it + // may change. Because the store is a use, the store will get reprocessed, + // but nothing will change about it, and so nothing above will catch it + // (since the class will not change). In order to make sure everything ends + // up okay, we need to recheck the leader of the class. Since stores of + // different values value number differently due to different memorydefs, we + // are guaranteed the leader is always the same between stores in the same + // class. + DEBUG(dbgs() << "Checking store leader\n"); + auto ProperLeader = + lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + if (EClass->RepLeader != ProperLeader) { + DEBUG(dbgs() << "Store leader changed, fixing\n"); + EClass->RepLeader = ProperLeader; + markLeaderChangeTouched(EClass); + markMemoryUsersTouched(MSSA->getMemoryAccess(SI)); + } } } diff --git a/test/Transforms/NewGVN/basic-cyclic-opt.ll b/test/Transforms/NewGVN/basic-cyclic-opt.ll new file mode 100644 index 00000000000..523ed2612e3 --- /dev/null +++ b/test/Transforms/NewGVN/basic-cyclic-opt.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +;; Function Attrs: nounwind ssp uwtable +;; We should eliminate the sub, and one of the phi nodes +define void @vnum_test1(i32* %data) #0 { +; CHECK-LABEL: @vnum_test1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB17:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP18:%.*]], [[BB17]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB19:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]] +; CHECK-NEXT: store i32 2, i32* [[TMP10]], align 4 +; CHECK-NEXT: store i32 0, i32* [[DATA]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]] +; CHECK-NEXT: br label [[BB17]] +; CHECK: bb17: +; CHECK-NEXT: [[TMP18]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb19: +; CHECK-NEXT: ret void +; +bb: + %tmp = getelementptr inbounds i32, i32* %data, i64 3 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 4 + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb17, %bb + %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb17 ] + %i.0 = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ] + %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb17 ] + %tmp5 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp5, label %bb6, label %bb19 + +bb6: ; preds = %bb4 + %tmp7 = getelementptr inbounds i32, i32* %data, i64 2 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = sext i32 %tmp8 to i64 + %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9 + store i32 2, i32* %tmp10, align 4 + %tmp11 = sub nsw i32 %m.0, %n.0 + %tmp12 = getelementptr inbounds i32, i32* %data, i64 0 + store i32 %tmp11, i32* %tmp12, align 4 + %tmp13 = getelementptr inbounds i32, i32* %data, i64 1 + %tmp14 = load i32, i32* %tmp13, align 4 + %tmp15 = add nsw i32 %m.0, %tmp14 + %tmp16 = add nsw i32 %n.0, %tmp14 + br label %bb17 + +bb17: ; preds = %bb6 + %tmp18 = add nsw i32 %i.0, 1 + br label %bb4 + +bb19: ; preds = %bb4 + ret void +} + +;; Function Attrs: nounwind ssp uwtable +;; We should eliminate the sub, one of the phi nodes, prove the store of the sub +;; and the load of data are equivalent, that the load always produces constant 0, and +;; delete the load replacing it with constant 0. +define i32 @vnum_test2(i32* %data) #0 { +; CHECK-LABEL: @vnum_test2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB19:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP20:%.*]], [[BB19]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB21:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]] +; CHECK-NEXT: store i32 2, i32* [[TMP10]], align 4 +; CHECK-NEXT: store i32 0, i32* [[DATA]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]] +; CHECK-NEXT: br label [[BB19]] +; CHECK: bb19: +; CHECK-NEXT: [[TMP20]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb21: +; CHECK-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr inbounds i32, i32* %data, i64 3 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 4 + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb19, %bb + %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb19 ] + %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb19 ] + %i.0 = phi i32 [ 0, %bb ], [ %tmp20, %bb19 ] + %p.0 = phi i32 [ undef, %bb ], [ %tmp18, %bb19 ] + %tmp5 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp5, label %bb6, label %bb21 + +bb6: ; preds = %bb4 + %tmp7 = getelementptr inbounds i32, i32* %data, i64 2 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = sext i32 %tmp8 to i64 + %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9 + store i32 2, i32* %tmp10, align 4 + %tmp11 = sub nsw i32 %m.0, %n.0 + %tmp12 = getelementptr inbounds i32, i32* %data, i64 0 + store i32 %tmp11, i32* %tmp12, align 4 + %tmp13 = getelementptr inbounds i32, i32* %data, i64 1 + %tmp14 = load i32, i32* %tmp13, align 4 + %tmp15 = add nsw i32 %m.0, %tmp14 + %tmp16 = add nsw i32 %n.0, %tmp14 + %tmp17 = getelementptr inbounds i32, i32* %data, i64 0 + %tmp18 = load i32, i32* %tmp17, align 4 + br label %bb19 + +bb19: ; preds = %bb6 + %tmp20 = add nsw i32 %i.0, 1 + br label %bb4 + +bb21: ; preds = %bb4 + ret i32 %p.0 +} + + +; Function Attrs: nounwind ssp uwtable +;; Same as test 2, with a conditional store of m-n, so it has to also discover +;; that data ends up with the same value no matter what branch is taken. +define i32 @vnum_test3(i32* %data) #0 { +; CHECK-LABEL: @vnum_test3( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[N_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP19:%.*]], [[BB21:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP22:%.*]], [[BB21]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 5 +; CHECK-NEXT: store i32 0, i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[I_0]], 30 +; CHECK-NEXT: br i1 [[TMP10]], label [[BB11:%.*]], label [[BB14:%.*]] +; CHECK: bb11: +; CHECK-NEXT: store i32 0, i32* [[TMP9]], align 4 +; CHECK-NEXT: br label [[BB14]] +; CHECK: bb14: +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP19]] = add nsw i32 [[N_0]], [[TMP18]] +; CHECK-NEXT: br label [[BB21]] +; CHECK: bb21: +; CHECK-NEXT: [[TMP22]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb23: +; CHECK-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr inbounds i32, i32* %data, i64 3 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 4 + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb21, %bb + %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp20, %bb21 ] + %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp19, %bb21 ] + %p.0 = phi i32 [ 0, %bb ], [ %tmp16, %bb21 ] + %i.0 = phi i32 [ 0, %bb ], [ %tmp22, %bb21 ] + %tmp5 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp5, label %bb6, label %bb23 + +bb6: ; preds = %bb4 + %tmp7 = getelementptr inbounds i32, i32* %data, i64 2 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = getelementptr inbounds i32, i32* %data, i64 5 + store i32 0, i32* %tmp9, align 4 + %tmp10 = icmp slt i32 %i.0, 30 + br i1 %tmp10, label %bb11, label %bb14 + +bb11: ; preds = %bb6 + %tmp12 = sub nsw i32 %m.0, %n.0 + %tmp13 = getelementptr inbounds i32, i32* %data, i64 5 + store i32 %tmp12, i32* %tmp13, align 4 + br label %bb14 + +bb14: ; preds = %bb11, %bb6 + %tmp15 = getelementptr inbounds i32, i32* %data, i64 5 + %tmp16 = load i32, i32* %tmp15, align 4 + %tmp17 = getelementptr inbounds i32, i32* %data, i64 1 + %tmp18 = load i32, i32* %tmp17, align 4 + %tmp19 = add nsw i32 %m.0, %tmp18 + %tmp20 = add nsw i32 %n.0, %tmp18 + br label %bb21 + +bb21: ; preds = %bb14 + %tmp22 = add nsw i32 %i.0, 1 + br label %bb4 + +bb23: ; preds = %bb4 + ret i32 %p.0 +} + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0, !0, !0} + +!0 = !{!"Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)"} diff --git a/test/Transforms/NewGVN/memory-handling.ll b/test/Transforms/NewGVN/memory-handling.ll new file mode 100644 index 00000000000..a0c4a998b8b --- /dev/null +++ b/test/Transforms/NewGVN/memory-handling.ll @@ -0,0 +1,195 @@ +;; This test is really dependent on propagating a lot of memory info around, but in the end, not +;; screwing up a single add. +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.Letter = type { i32, i32, i32, i32 } + +@alPhrase = external local_unnamed_addr global [26 x %struct.Letter], align 16 +@aqMainMask = external local_unnamed_addr global [2 x i64], align 16 +@aqMainSign = external local_unnamed_addr global [2 x i64], align 16 +@cchPhraseLength = external local_unnamed_addr global i32, align 4 +@auGlobalFrequency = external local_unnamed_addr global [26 x i32], align 16 +@.str.7 = external hidden unnamed_addr constant [28 x i8], align 1 + +; Function Attrs: nounwind uwtable +declare void @Fatal(i8*, i32) local_unnamed_addr #0 + +; Function Attrs: nounwind readnone +declare i16** @__ctype_b_loc() local_unnamed_addr #1 + +; Function Attrs: nounwind uwtable +define void @BuildMask(i8* nocapture readonly) local_unnamed_addr #0 { + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([26 x %struct.Letter]* @alPhrase to i8*), i8 0, i64 416, i32 16, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([2 x i64]* @aqMainMask to i8*), i8 0, i64 16, i32 16, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([2 x i64]* @aqMainSign to i8*), i8 0, i64 16, i32 16, i1 false) + br label %.sink.split + +.sink.split: ; preds = %14, %1 + %.0 = phi i8* [ %0, %1 ], [ %.lcssa67, %14 ] + %.sink = phi i32 [ 0, %1 ], [ %23, %14 ] + store i32 %.sink, i32* @cchPhraseLength, align 4, !tbaa !1 + br label %2 + +;