From cbd4ff93102479238e293f6ddb688df3c6e9ba47 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Tue, 30 May 2017 21:22:18 +0000 Subject: [PATCH] [PartialInlining] Shrinkwrap allocas with live range contained in outline region. Differential Revision: http://reviews.llvm.org/D33618 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304245 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/CodeExtractor.h | 13 ++- lib/Transforms/Utils/CodeExtractor.cpp | 83 +++++++++++++++++-- .../CodeExtractor/PartialInlineAlloca.ll | 68 +++++++++++++++ .../CodeExtractor/PartialInlineAlloca2.ll | 65 +++++++++++++++ .../CodeExtractor/PartialInlineAlloca4.ll | 67 +++++++++++++++ .../CodeExtractor/PartialInlineAlloca5.ll | 67 +++++++++++++++ 6 files changed, 355 insertions(+), 8 deletions(-) create mode 100644 test/Transforms/CodeExtractor/PartialInlineAlloca.ll create mode 100644 test/Transforms/CodeExtractor/PartialInlineAlloca2.ll create mode 100644 test/Transforms/CodeExtractor/PartialInlineAlloca4.ll create mode 100644 test/Transforms/CodeExtractor/PartialInlineAlloca5.ll diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h index a602498e5f2..7e23544af1a 100644 --- a/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/include/llvm/Transforms/Utils/CodeExtractor.h @@ -25,6 +25,7 @@ template class ArrayRef; class BranchProbabilityInfo; class DominatorTree; class Function; + class Instruction; class Loop; class Module; class RegionNode; @@ -103,7 +104,17 @@ template class ArrayRef; /// a code sequence, that sequence is modified, including changing these /// sets, before extraction occurs. These modifications won't have any /// significant impact on the cost however. - void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const; + void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &Allocas) const; + /// Find the set of allocas whose life ranges are contained within the + /// outlined region. + /// + /// Allocas which have life_time markers contained in the outlined region + /// should be pushed to the outlined function. The address bitcasts that + /// are used by the lifetime markers are also candidates for shrink- + /// wrapping. The instructions that need to be sinked are collected in + /// 'Allocas'. + void findAllocas(ValueSet &Allocas) const; private: void severSplitPHINodes(BasicBlock *&Header); diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index ed72099ec3e..24d28a6c283 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" @@ -141,16 +142,77 @@ static bool definedInCaller(const SetVector &Blocks, Value *V) { return false; } -void CodeExtractor::findInputsOutputs(ValueSet &Inputs, - ValueSet &Outputs) const { +void CodeExtractor::findAllocas(ValueSet &SinkCands) const { + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + auto *AI = dyn_cast(&II); + if (!AI) + continue; + + // Returns true if matching life time markers are found within + // the outlined region. + auto GetLifeTimeMarkers = [&](Instruction *Addr) { + Instruction *LifeStart = nullptr, *LifeEnd = nullptr; + for (User *U : Addr->users()) { + if (!definedInRegion(Blocks, U)) + return false; + + IntrinsicInst *IntrInst = dyn_cast(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) + LifeStart = IntrInst; + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + LifeEnd = IntrInst; + } + } + return LifeStart && LifeEnd; + }; + + if (GetLifeTimeMarkers(AI)) { + SinkCands.insert(AI); + continue; + } + + // Follow the bitcast: + Instruction *MarkerAddr = nullptr; + for (User *U : AI->users()) { + if (U->stripPointerCasts() == AI) { + Instruction *Bitcast = cast(U); + if (GetLifeTimeMarkers(Bitcast)) { + MarkerAddr = Bitcast; + continue; + } + } + if (!definedInRegion(Blocks, U)) { + MarkerAddr = nullptr; + break; + } + } + if (MarkerAddr) { + if (!definedInRegion(Blocks, MarkerAddr)) + SinkCands.insert(MarkerAddr); + SinkCands.insert(AI); + } + } + } +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &SinkCands) const { + for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) - if (definedInCaller(Blocks, *OI)) - Inputs.insert(*OI); + ++OI) { + Value *V = *OI; + if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + Inputs.insert(V); + } for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { @@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs; + ValueSet inputs, outputs, SinkingCands; // Assumption: this is a single-entry code region, and the header is the first // block in the region. @@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegion() { "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + findAllocas(SinkingCands); + // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs); + findInputsOutputs(inputs, outputs, SinkingCands); + + // Now sink all instructions which only have non-phi uses inside the region + for (auto *II : SinkingCands) + cast(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); // Calculate the exit blocks for the extracted region and the total exit // weights for each of those blocks. diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca.ll new file mode 100644 index 00000000000..48db0b61a31 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineAlloca.ll @@ -0,0 +1,68 @@ + +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s + ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s + +%"class.base" = type { %"struct.base"* } +%"struct.base" = type opaque + +@g = external local_unnamed_addr global i32, align 4 + +; Function Attrs: nounwind uwtable +define i32 @callee_sinkable_bitcast(i32 %arg) local_unnamed_addr #0 { +; CHECK-LABEL:define{{.*}}@callee_sinkable_bitcast.{{[0-9]}} +; CHECK: alloca +; CHECK-NEXT: bitcast +; CHECK: call void @llvm.lifetime +bb: + %tmp = alloca %"class.base", align 4 + %tmp1 = bitcast %"class.base"* %tmp to i8* + %tmp2 = load i32, i32* @g, align 4, !tbaa !2 + %tmp3 = add nsw i32 %tmp2, 1 + %tmp4 = icmp slt i32 %arg, 0 + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2 + %tmp11 = bitcast %"class.base"* %tmp to i32* + store i32 %tmp3, i32* %tmp11, align 4, !tbaa !2 + store i32 %tmp3, i32* @g, align 4, !tbaa !2 + call void @bar(i32* nonnull %tmp11) #2 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2 + br label %bb6 + +bb6: ; preds = %bb5, %bb + %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ] + ret i32 %tmp7 +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +declare void @bar(i32*) local_unnamed_addr #2 +declare void @bar2(i32*, i32*) local_unnamed_addr #1 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define i32 @caller(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = tail call i32 @callee_sinkable_bitcast(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { nounwind uwtable} +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303574)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} + + diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll new file mode 100644 index 00000000000..4ca418389e5 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s + +%"class.base" = type { %"struct.base"* } +%"struct.base" = type opaque + +@g = external local_unnamed_addr global i32, align 4 + +define i32 @callee_no_bitcast(i32 %arg) local_unnamed_addr #0 { +; CHECK-LABEL:define{{.*}}@callee_no_bitcast.{{[0-9]}} +; CHECK: alloca +; CHECK: call void @llvm.lifetime +bb: + %tmp = alloca i8, align 4 + %tmp2 = load i32, i32* @g, align 4, !tbaa !2 + %tmp3 = add nsw i32 %tmp2, 1 + %tmp4 = icmp slt i32 %arg, 0 + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2 + store i32 %tmp3, i32* @g, align 4, !tbaa !2 + %tmp11 = bitcast i8 * %tmp to i32* + call void @bar(i32* nonnull %tmp11) #2 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2 + br label %bb6 + +bb6: ; preds = %bb5, %bb + %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ] + ret i32 %tmp7 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +declare void @bar(i32*) local_unnamed_addr #2 +declare void @bar2(i32*, i32*) local_unnamed_addr #1 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define i32 @caller(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = tail call i32 @callee_no_bitcast(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { nounwind uwtable} +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303574)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} + + + diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll new file mode 100644 index 00000000000..6bb38d44f46 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s + +%"class.base" = type { %"struct.base"* } +%"struct.base" = type opaque + +@g = external local_unnamed_addr global i32, align 4 + +define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 { +; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}} +; CHECK-NOT: alloca +; CHECK: call void @llvm.lifetime +bb: + %tmp = alloca i8, align 4 + %tmp2 = load i32, i32* @g, align 4, !tbaa !2 + %tmp3 = add nsw i32 %tmp2, 1 + %tmp4 = icmp slt i32 %arg, 0 + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2 + store i32 %tmp3, i32* @g, align 4, !tbaa !2 + %tmp11 = bitcast i8* %tmp to i32* + call void @bar(i32* nonnull %tmp11) #2 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2 + br label %bb6 + +bb6: ; preds = %bb5, %bb + %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ] + %tmp1 = bitcast i8* %tmp to i32* + ret i32 %tmp7 +} + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +declare void @bar(i32*) local_unnamed_addr #2 +declare void @bar2(i32*, i32*) local_unnamed_addr #1 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define i32 @caller(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = tail call i32 @callee_unknown_use1(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { nounwind uwtable} +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303574)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} + + + diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll new file mode 100644 index 00000000000..9c53496e1ce --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s + +%"class.base" = type { %"struct.base"* } +%"struct.base" = type opaque + +@g = external local_unnamed_addr global i32, align 4 + +define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 { +; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}} +; CHECK-NOT: alloca +; CHECK: call void @llvm.lifetime +bb: + %tmp = alloca i32, align 4 + %tmp1 = bitcast i32* %tmp to i8* + %tmp2 = load i32, i32* @g, align 4, !tbaa !2 + %tmp3 = add nsw i32 %tmp2, 1 + %tmp4 = icmp slt i32 %arg, 0 + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2 + store i32 %tmp3, i32* %tmp, align 4, !tbaa !2 + store i32 %tmp3, i32* @g, align 4, !tbaa !2 + call void @bar(i32* nonnull %tmp) #2 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2 + br label %bb6 + +bb6: ; preds = %bb5, %bb + %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ] + %tmp10 = bitcast i8* %tmp1 to i32* + ret i32 %tmp7 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +declare void @bar(i32*) local_unnamed_addr #2 +declare void @bar2(i32*, i32*) local_unnamed_addr #1 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define i32 @caller(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = tail call i32 @callee_unknown_use2(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { nounwind uwtable} +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303574)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} + + + -- 2.40.0