From: Vedant Kumar Date: Fri, 4 Jan 2019 17:43:22 +0000 (+0000) Subject: [CodeExtractor] Do not extract unsafe lifetime markers X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bbbee1ff5049d7e299b1f5aa987ae7aeb4207c82;p=llvm [CodeExtractor] Do not extract unsafe lifetime markers Lifetime markers which reference inputs to the extraction region are not safe to extract. Example ('rhs' will be extracted): ``` entry: +------------+ | x = alloca | | y = alloca | +------------+ / \ lhs: rhs: +-------------------+ +-------------------+ | lifetime_start(x) | | lifetime_start(x) | | use(x) | | lifetime_start(y) | | lifetime_end(x) | | use(x, y) | | lifetime_start(y) | | lifetime_end(y) | | use(y) | | lifetime_end(x) | | lifetime_end(y) | +-------------------+ +-------------------+ ``` Prior to extraction, the stack coloring pass sees that the slots for 'x' and 'y' are in-use at the same time. After extraction, the coloring pass infers that 'x' and 'y' are *not* in-use concurrently, because markers from 'rhs' are no longer available to help decide otherwise. This leads to a miscompile, because the stack slots actually are in-use concurrently in the extracted function. Fix this by moving lifetime start/end markers for memory regions defined in the calling function around the call to the extracted function. Fixes llvm.org/PR39671 (rdar://45939472). Differential Revision: https://reviews.llvm.org/D55967 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350420 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h index 498ff5a7883..fee79fdc3bf 100644 --- a/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/include/llvm/Transforms/Utils/CodeExtractor.h @@ -27,6 +27,7 @@ class BasicBlock; class BlockFrequency; class BlockFrequencyInfo; class BranchProbabilityInfo; +class CallInst; class DominatorTree; class Function; class Instruction; @@ -164,10 +165,9 @@ class Value; DenseMap &ExitWeights, BranchProbabilityInfo *BPI); - void emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *newHeader, - ValueSet &inputs, - ValueSet &outputs); + CallInst *emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *newHeader, + ValueSet &inputs, ValueSet &outputs); }; } // end namespace llvm diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 79904e206a0..25d4ae583ec 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -883,9 +883,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. -void CodeExtractor:: -emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, - ValueSet &inputs, ValueSet &outputs) { +CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *codeReplacer, + ValueSet &inputs, + ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs std::vector params, StructValues, ReloadOutputs, Reloads; @@ -893,6 +894,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); const DataLayout &DL = M->getDataLayout(); + CallInst *call = nullptr; // Add inputs as params, or to be filled into the struct for (Value *input : inputs) @@ -943,8 +945,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Emit the call to the function - CallInst *call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); + call = CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : ""); // Add debug location to the new call, if the original function has debug // info. In that case, the terminator of the entry block of the extracted // function contains the first debug location of the extracted function, @@ -1116,6 +1118,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } + + return call; } void CodeExtractor::moveCodeToFunction(Function *newFunction) { @@ -1177,6 +1181,71 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } +/// Scan the extraction region for lifetime markers which reference inputs. +/// Erase these markers. Return the inputs which were referenced. +/// +/// The extraction region is defined by a set of blocks (\p Blocks), and a set +/// of allocas which will be moved from the caller function into the extracted +/// function (\p SunkAllocas). +static SetVector +eraseLifetimeMarkersOnInputs(const SetVector &Blocks, + const SetVector &SunkAllocas) { + SetVector InputObjectsWithLifetime; + for (BasicBlock *BB : Blocks) { + for (auto It = BB->begin(), End = BB->end(); It != End;) { + auto *II = dyn_cast(&*It); + ++It; + if (!II || !II->isLifetimeStartOrEnd()) + continue; + + // Get the memory operand of the lifetime marker. If the underlying + // object is a sunk alloca, or is otherwise defined in the extraction + // region, the lifetime marker must not be erased. + Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); + if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) + continue; + + InputObjectsWithLifetime.insert(Mem); + II->eraseFromParent(); + } + } + return InputObjectsWithLifetime; +} + +/// Insert lifetime start/end markers surrounding the call to the new function +/// for objects defined in the caller. +static void insertLifetimeMarkersSurroundingCall( + Module *M, const SetVector &InputObjectsWithLifetime, + CallInst *TheCall) { + if (InputObjectsWithLifetime.empty()) + return; + + LLVMContext &Ctx = M->getContext(); + auto Int8PtrTy = Type::getInt8PtrTy(Ctx); + auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); + auto LifetimeStartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + auto LifetimeEndFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_end, Int8PtrTy); + for (Value *Mem : InputObjectsWithLifetime) { + assert((!isa(Mem) || + cast(Mem)->getFunction() == TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *MemAsI8Ptr = nullptr; + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + + auto StartMarker = + CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr}); + StartMarker->insertBefore(TheCall); + auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr}); + EndMarker->insertAfter(TheCall); + } +} + Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; @@ -1291,11 +1360,17 @@ Function *CodeExtractor::extractCodeRegion() { cast(II)->moveBefore(TI); } + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start/end markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet InputObjectsWithLifetime = + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands); + // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunction(inputs, outputs, header, - newFuncRoot, - codeReplacer, oldFunction, - oldFunction->getParent()); + Function *newFunction = + constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, + oldFunction, oldFunction->getParent()); // Update the entry count of the function. if (BFI) { @@ -1306,10 +1381,16 @@ Function *CodeExtractor::extractCodeRegion() { BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } - emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + CallInst *TheCall = + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); moveCodeToFunction(newFunction); + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), + InputObjectsWithLifetime, TheCall); + // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll index 6bb38d44f46..04789eaad29 100644 --- a/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll +++ b/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll @@ -6,10 +6,14 @@ @g = external local_unnamed_addr global i32, align 4 +; CHECK-LABEL: define{{.*}}@caller( +; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %tmp.i) +; CHECK-NEXT: call void @callee_unknown_use1.{{.*}}(i8* %tmp.i +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %tmp.i) + define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 { ; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}} ; CHECK-NOT: alloca -; CHECK: call void @llvm.lifetime bb: %tmp = alloca i8, align 4 %tmp2 = load i32, i32* @g, align 4, !tbaa !2 diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll index 9c53496e1ce..0bde58fbccd 100644 --- a/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll +++ b/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll @@ -9,7 +9,6 @@ define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 { ; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}} ; CHECK-NOT: alloca -; CHECK: call void @llvm.lifetime bb: %tmp = alloca i32, align 4 %tmp1 = bitcast i32* %tmp to i8* diff --git a/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll b/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll new file mode 100644 index 00000000000..c6482f823b4 --- /dev/null +++ b/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @use(i8*) + +declare void @cold_use2(i8*, i8*) cold + +; CHECK-LABEL: define {{.*}}@foo( +define void @foo() { +entry: + %local1 = alloca i256 + %local2 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + %local2_cast = bitcast i256* %local2 to i8* + br i1 undef, label %normalPath, label %outlinedPath + +normalPath: + ; These two uses of stack slots are non-overlapping. Based on this alone, + ; the stack slots could be merged. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + call void @use(i8* %local1_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @use(i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + ret void + +; CHECK-LABEL: codeRepl: +; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK: br i1 + +outlinedPath: + ; These two uses of stack slots are overlapping. This should prevent + ; merging of stack slots. CodeExtractor must replicate the effects of + ; these markers in the caller to inhibit stack coloring. + %gep1 = getelementptr inbounds i8, i8* %local1_cast, i64 1 + call void @llvm.lifetime.start.p0i8(i64 1, i8* %gep1) + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @cold_use2(i8* %local1_cast, i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %gep1) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + br i1 undef, label %outlinedPath2, label %outlinedPathExit + +outlinedPath2: + ; These extra lifetime markers are used to test that we emit only one + ; pair of guard markers in the caller per memory object. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @use(i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + ret void + +outlinedPathExit: + ret void +} + +; CHECK-LABEL: define {{.*}}@foo.cold.1( +; CHECK-NOT: @llvm.lifetime