From 0edfaf6dd292069ccffe888f89feb1471df43c8a Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Wed, 10 Jul 2019 16:32:20 +0000 Subject: [PATCH] [CodeExtractor] Fix sinking of allocas with multiple bitcast uses (PR42451) An alloca which can be sunk into the extraction region may have more than one bitcast use. Move these uses along with the alloca to prevent use-before-def. Testing: check-llvm, stage2 build of clang Fixes llvm.org/PR42451. Differential Revision: https://reviews.llvm.org/D64463 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365660 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/CodeExtractor.cpp | 42 +++++++---- .../CodeExtractor/live_shrink_multiple.ll | 2 +- ...nk-multiple-bitcasts-of-allocas-pr42451.ll | 74 +++++++++++++++++++ 3 files changed, 104 insertions(+), 14 deletions(-) create mode 100644 test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f70b09c7cad..27064d590bb 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -519,18 +519,20 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, if (Bitcasts.empty()) continue; - Instruction *BitcastAddr = Bitcasts.back(); - const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo.back(); - assert(LMI.LifeStart && - "Unsafe to sink bitcast without lifetime markers"); - moveOrIgnoreLifetimeMarkers(LMI); - if (!definedInRegion(Blocks, BitcastAddr)) { - LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr - << "\n"); - SinkCands.insert(BitcastAddr); - } LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); + } + } } } } @@ -1431,9 +1433,23 @@ Function *CodeExtractor::extractCodeRegion() { findInputsOutputs(inputs, outputs, SinkingCands); // Now sink all instructions which only have non-phi uses inside the region. - for (auto *II : SinkingCands) - cast(II)->moveBefore(*newFuncRoot, - newFuncRoot->getFirstInsertionPt()); + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst *FirstSunkAlloca = nullptr; + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && + "Did not expect a sink candidate without any allocas"); + for (auto *II : SinkingCands) { + if (!isa(II)) { + cast(II)->moveAfter(FirstSunkAlloca); + } + } if (!HoistingCands.empty()) { auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); diff --git a/test/Transforms/CodeExtractor/live_shrink_multiple.ll b/test/Transforms/CodeExtractor/live_shrink_multiple.ll index 9350ca2ef9c..3f4c9472b77 100644 --- a/test/Transforms/CodeExtractor/live_shrink_multiple.ll +++ b/test/Transforms/CodeExtractor/live_shrink_multiple.ll @@ -45,9 +45,9 @@ bb: ; CHECK-LABEL: define internal void @_Z3foov.1. ; CHECK: newFuncRoot: ; CHECK-NEXT: alloca +; CHECK-NEXT: alloca ; CHECK-NEXT: bitcast ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 -; CHECK-NEXT: alloca ; CHECK-NEXT: bitcast ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 ; CHECK: call void @llvm.lifetime.end.p0i8 diff --git a/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll b/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll new file mode 100644 index 00000000000..d2f8398f322 --- /dev/null +++ b/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll @@ -0,0 +1,74 @@ +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@c = common global i32 0, align 4 +@h = common global i32 0, align 4 + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 + +declare i32* @m() + +; CHECK-LABEL: define void @main() +; CHECK-NEXT: %.sroa.4.i = alloca [20 x i8], align 2 +; CHECK-NEXT: %.sroa.5.i = alloca [6 x i8], align 8 +; CHECK-NEXT: %1 = bitcast [6 x i8]* %.sroa.5.i to i8* + +define void @main() #0 { + %.sroa.4.i = alloca [20 x i8], align 2 + %.sroa.5.i = alloca [6 x i8], align 8 + %1 = bitcast [6 x i8]* %.sroa.5.i to i8* + %2 = load i32, i32* @h, align 4, !tbaa !4 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %12, label %4 + +4: ; preds = %0 + %5 = call i32* @m() #3 + %.sroa.4.0..sroa_idx21.i = getelementptr inbounds [20 x i8], [20 x i8]* %.sroa.4.i, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 20, i8* %.sroa.4.0..sroa_idx21.i) #3 + %.sroa.5.0..sroa_idx16.i = getelementptr inbounds [6 x i8], [6 x i8]* %.sroa.5.i, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 6, i8* %.sroa.5.0..sroa_idx16.i) #3 + call void @llvm.memset.p0i8.i64(i8* align 2 %.sroa.4.0..sroa_idx21.i, i8 0, i64 20, i1 false) #3 + call void @llvm.memset.p0i8.i64(i8* align 8 %.sroa.5.0..sroa_idx16.i, i8 0, i64 6, i1 false) #3 + %6 = load i32, i32* @c, align 4, !tbaa !4 + %7 = trunc i32 %6 to i16 + call void @llvm.lifetime.end.p0i8(i64 20, i8* %.sroa.4.0..sroa_idx21.i) #3 + call void @llvm.lifetime.end.p0i8(i64 6, i8* %.sroa.5.0..sroa_idx16.i) #3 + call void @llvm.lifetime.start.p0i8(i64 6, i8* %1) #3 + call void @llvm.memset.p0i8.i64(i8* align 1 %1, i8 3, i64 6, i1 false) + br label %8 + +8: ; preds = %8, %4 + %.0.i = phi i32 [ 0, %4 ], [ %10, %8 ] + %9 = sext i32 %.0.i to i64 + %10 = add nsw i32 %.0.i, 1 + %11 = icmp slt i32 %10, 6 + br i1 %11, label %8, label %l.exit + +l.exit: ; preds = %8 + call void @llvm.lifetime.end.p0i8(i64 6, i8* %1) #3 + br label %12 + +12: ; preds = %l.exit, %0 + %13 = phi i1 [ true, %0 ], [ true, %l.exit ] + ret void +} + +attributes #0 = { cold } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 14]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"PIC Level", i32 2} +!3 = !{!"Apple clang version 11.0.0 (clang-1100.0.20.17)"} +!4 = !{!5, !5, i64 0} +!5 = !{!"int", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} -- 2.50.1