From: Reid Kleckner Date: Wed, 13 Sep 2017 01:43:25 +0000 (+0000) Subject: [InstCombine] Add a flag to disable LowerDbgDeclare X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5b3b7ed35de875b442a03c006004c411193b4c2d;p=llvm [InstCombine] Add a flag to disable LowerDbgDeclare Summary: This should improve optimized debug info for address-taken variables at the cost of inaccurate debug info in some situations. We patched this into clang and deployed this change to Chromium developers, and this significantly improved debuggability of optimized code. The long-term solution to PR34136 seems more and more like it's going to take a while, so I would like to commit this change under a flag so that it can be used as a stop-gap measure. This flag should really help so for C++ aggregates like std::string and std::vector, which are typically address-taken, even after inlining, and cannot be SROA-ed. Reviewers: aprantl, dblaikie, probinson, dberlin Subscribers: hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D36596 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313108 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 1881f78255b..8f2b44036f6 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -52,6 +52,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -91,6 +92,16 @@ static cl::opt MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); +// FIXME: Remove this flag when it is no longer necessary to convert +// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false +// increases variable availability at the cost of accuracy. Variables that +// cannot be promoted by mem2reg or SROA will be described as living in memory +// for their entire lifetime. However, passes like DSE and instcombine can +// delete stores to the alloca, leading to misleading and inaccurate debug +// information. This flag can be removed when those passes are fixed. +static cl::opt ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", + cl::Hidden, cl::init(true)); + Value *InstCombiner::EmitGEPOffset(User *GEP) { return llvm::EmitGEPOffset(&Builder, DL, GEP); } @@ -2092,6 +2103,16 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector Users; + + // If we are removing an alloca with a dbg.declare, insert dbg.value calls + // before each store. + DbgDeclareInst *DDI = nullptr; + std::unique_ptr DIB; + if (isa(MI)) { + DDI = FindAllocaDbgDeclare(&MI); + DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); + } + if (isAllocSiteRemovable(&MI, Users, &TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { // Lowering all @llvm.objectsize calls first because they may @@ -2124,6 +2145,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { } else if (isa(I) || isa(I) || isa(I)) { replaceInstUsesWith(*I, UndefValue::get(I->getType())); + } else if (DDI && isa(I)) { + ConvertDebugDeclareToDebugValue(DDI, cast(I), *DIB); } eraseInstFromFunction(*I); } @@ -2135,6 +2158,10 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), None, "", II->getParent()); } + + if (DDI) + eraseInstFromFunction(*DDI); + return eraseInstFromFunction(MI); } return nullptr; @@ -3188,7 +3215,9 @@ static bool combineInstructionsOverFunction( // Lower dbg.declare intrinsics otherwise their value may be clobbered // by instcombiner. - bool MadeIRChange = LowerDbgDeclare(F); + bool MadeIRChange = false; + if (ShouldLowerDbgDeclare) + MadeIRChange = LowerDbgDeclare(F); // Iterate while there is work to do. int Iteration = 0; diff --git a/test/Transforms/InstCombine/debuginfo-skip.ll b/test/Transforms/InstCombine/debuginfo-skip.ll index e7bd20ac9b0..56c5f425296 100644 --- a/test/Transforms/InstCombine/debuginfo-skip.ll +++ b/test/Transforms/InstCombine/debuginfo-skip.ll @@ -1,12 +1,5 @@ -; RUN: opt < %s -instcombine -debug -S -o %t 2>&1 | FileCheck %s -; RUN: cat %t | FileCheck %s --check-prefix=CHECK-IR -; REQUIRES: asserts - -; Debug output from InstCombine should not have any @llvm.dbg.* instructions visited -; CHECK-NOT: call void @llvm.dbg. - -; The resulting IR should still have them -; CHECK-IR: call void @llvm.dbg. +; RUN: opt -instcombine-lower-dbg-declare=0 < %s -instcombine -S | FileCheck %s +; RUN: opt -instcombine-lower-dbg-declare=1 < %s -instcombine -S | FileCheck %s define i32 @foo(i32 %j) #0 !dbg !7 { entry: @@ -18,6 +11,14 @@ entry: ret i32 %0, !dbg !15 } +; Instcombine can remove the alloca and forward the load to store, but it +; should convert the declare to dbg value. +; CHECK-LABEL: define i32 @foo(i32 %j) +; CHECK-NOT: alloca +; CHECK: call void @llvm.dbg.value(metadata i32 %j, {{.*}}) +; CHECK: call void @llvm.dbg.value(metadata i32 10, {{.*}}) +; CHECK: ret i32 %j + declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 declare void @llvm.dbg.value(metadata, metadata, metadata) #1 diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll index c89a3400c88..e713f7ea45c 100644 --- a/test/Transforms/InstCombine/debuginfo.ll +++ b/test/Transforms/InstCombine/debuginfo.ll @@ -1,48 +1,96 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -instcombine-lower-dbg-declare=0 -S \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOLOWER +; RUN: opt < %s -instcombine -instcombine-lower-dbg-declare=1 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--linux" + +%struct.TwoRegs = type { i64, i64 } declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone -declare i8* @foo(i8*, i32, i64, i64) nounwind +declare i8* @passthru_callee(i8*, i32, i64, i64) -define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp !dbg !1 { +define i8* @passthru(i8* %a, i32 %b, i64 %c) !dbg !1 { entry: - %__dest.addr = alloca i8*, align 8 - %__val.addr = alloca i32, align 4 - %__len.addr = alloca i64, align 8 - store i8* %__dest, i8** %__dest.addr, align 8 -; CHECK-NOT: call void @llvm.dbg.declare -; CHECK: call void @llvm.dbg.value - call void @llvm.dbg.declare(metadata i8** %__dest.addr, metadata !0, metadata !DIExpression()), !dbg !16 - store i32 %__val, i32* %__val.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %__val.addr, metadata !7, metadata !DIExpression()), !dbg !18 - store i64 %__len, i64* %__len.addr, align 8 - call void @llvm.dbg.declare(metadata i64* %__len.addr, metadata !9, metadata !DIExpression()), !dbg !20 - %tmp = load i8*, i8** %__dest.addr, align 8, !dbg !21 - %tmp1 = load i32, i32* %__val.addr, align 4, !dbg !21 - %tmp2 = load i64, i64* %__len.addr, align 8, !dbg !21 - %tmp3 = load i8*, i8** %__dest.addr, align 8, !dbg !21 + %a.addr = alloca i8*, align 8 + %b.addr = alloca i32, align 4 + %c.addr = alloca i64, align 8 + store i8* %a, i8** %a.addr, align 8 + call void @llvm.dbg.declare(metadata i8** %a.addr, metadata !0, metadata !DIExpression()), !dbg !16 + store i32 %b, i32* %b.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !7, metadata !DIExpression()), !dbg !18 + store i64 %c, i64* %c.addr, align 8 + call void @llvm.dbg.declare(metadata i64* %c.addr, metadata !9, metadata !DIExpression()), !dbg !20 + %tmp = load i8*, i8** %a.addr, align 8, !dbg !21 + %tmp1 = load i32, i32* %b.addr, align 4, !dbg !21 + %tmp2 = load i64, i64* %c.addr, align 8, !dbg !21 + %tmp3 = load i8*, i8** %a.addr, align 8, !dbg !21 %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21 - %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21 + %call = call i8* @passthru_callee(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21 ret i8* %call, !dbg !21 } +; CHECK-LABEL: define i8* @passthru(i8* %a, i32 %b, i64 %c) +; CHECK-NOT: alloca +; CHECK-NOT: store +; CHECK-NOT: call void @llvm.dbg.declare +; CHECK: call void @llvm.dbg.value(metadata i8* %a, {{.*}}) +; CHECK-NOT: store +; CHECK: call void @llvm.dbg.value(metadata i32 %b, {{.*}}) +; CHECK-NOT: store +; CHECK: call void @llvm.dbg.value(metadata i64 %c, {{.*}}) +; CHECK-NOT: store +; CHECK: call i8* @passthru_callee(i8* %a, i32 %b, i64 %c, i64 %{{.*}}) + +declare void @tworegs_callee(i64, i64) + +; Lowering dbg.declare in instcombine doesn't handle this case very well. + +define void @tworegs(i64 %o.coerce0, i64 %o.coerce1) !dbg !31 { +entry: + %o = alloca %struct.TwoRegs, align 8 + %0 = bitcast %struct.TwoRegs* %o to { i64, i64 }* + %1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 0 + store i64 %o.coerce0, i64* %1, align 8 + %2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 1 + store i64 %o.coerce1, i64* %2, align 8 + call void @llvm.dbg.declare(metadata %struct.TwoRegs* %o, metadata !35, metadata !DIExpression()), !dbg !32 + %3 = bitcast %struct.TwoRegs* %o to { i64, i64 }*, !dbg !33 + %4 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 0, !dbg !33 + %5 = load i64, i64* %4, align 8, !dbg !33 + %6 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 1, !dbg !33 + %7 = load i64, i64* %6, align 8, !dbg !33 + call void @tworegs_callee(i64 %5, i64 %7), !dbg !33 + ret void, !dbg !33 +} + +; NOLOWER-LABEL: define void @tworegs(i64 %o.coerce0, i64 %o.coerce1) +; NOLOWER-NOT: alloca +; NOLOWER-NOT: store +; NOLOWER-NOT: call void @llvm.dbg.declare +; NOLOWER: call void @llvm.dbg.value(metadata i64 %o.coerce0, {{.*}}) +; NOLOWER-NOT: store +; NOLOWER: call void @llvm.dbg.value(metadata i64 %o.coerce1, {{.*}}) +; NOLOWER-NOT: store +; NOLOWER: call void @tworegs_callee(i64 %o.coerce0, i64 %o.coerce1) + + !llvm.dbg.cu = !{!3} !llvm.module.flags = !{!30} -!0 = !DILocalVariable(name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6) -!1 = distinct !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !3, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25) +!0 = !DILocalVariable(name: "a", line: 78, arg: 1, scope: !1, file: !2, type: !6) +!1 = distinct !DISubprogram(name: "passthru", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !3, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25) !2 = !DIFile(filename: "string.h", directory: "Game") !3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !29, retainedTypes: !29) !4 = !DISubroutineType(types: !5) !5 = !{!6} !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !3, baseType: null) -!7 = !DILocalVariable(name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8) +!7 = !DILocalVariable(name: "b", line: 78, arg: 2, scope: !1, file: !2, type: !8) !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!9 = !DILocalVariable(name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10) -!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", line: 80, file: !27, scope: !3, baseType: !11) -!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "__darwin_size_t", line: 90, file: !27, scope: !3, baseType: !12) +!9 = !DILocalVariable(name: "c", line: 78, arg: 3, scope: !1, file: !2, type: !12) !12 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned) !16 = !DILocation(line: 78, column: 28, scope: !1) !18 = !DILocation(line: 78, column: 40, scope: !1) @@ -51,8 +99,19 @@ entry: !22 = distinct !DILexicalBlock(line: 80, column: 3, file: !27, scope: !23) !23 = distinct !DILexicalBlock(line: 79, column: 1, file: !27, scope: !1) !25 = !{!0, !7, !9} -!26 = !DIFile(filename: "bits.c", directory: "Game") !27 = !DIFile(filename: "string.h", directory: "Game") !28 = !DIFile(filename: "bits.c", directory: "Game") !29 = !{} !30 = !{i32 1, !"Debug Info Version", i32 3} + +!31 = distinct !DISubprogram(name: "tworegs", scope: !28, file: !28, line: 4, type: !4, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !3, variables: !34) +!32 = !DILocation(line: 4, column: 23, scope: !31) +!33 = !DILocation(line: 5, column: 3, scope: !31) +!34 = !{!35} +!35 = !DILocalVariable(name: "o", arg: 1, scope: !31, file: !28, line: 4, type: !36) +!36 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TwoRegs", file: !28, line: 1, size: 128, elements: !37) +!37 = !{!38, !39} +!38 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !36, file: !28, line: 1, baseType: !12, size: 64) +!39 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !36, file: !28, line: 1, baseType: !12, size: 64) +!40 = !DISubroutineType(types: !41) +!41 = !{!36} diff --git a/test/Transforms/Util/simplify-dbg-declare-load.ll b/test/Transforms/Util/simplify-dbg-declare-load.ll index 6a1a8eedd75..5aba8804c24 100644 --- a/test/Transforms/Util/simplify-dbg-declare-load.ll +++ b/test/Transforms/Util/simplify-dbg-declare-load.ll @@ -1,4 +1,5 @@ -; RUN: opt -instcombine -S < %s | FileCheck %s +; RUN: opt -instcombine -instcombine-lower-dbg-declare=1 -S < %s | FileCheck %s +; RUN: opt -instcombine -instcombine-lower-dbg-declare=0 -S < %s | FileCheck %s --check-prefix=DECLARE target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" @@ -26,6 +27,11 @@ idxend: ; preds = %top ret void } +; Keep the declare if we keep the alloca. +; DECLARE-LABEL: define void @julia_fastshortest_6256() +; DECLARE: %cp = alloca %foo, align 8 +; DECLARE: call void @llvm.dbg.declare(metadata %foo* %cp, + attributes #0 = { nounwind readnone } attributes #1 = { sspreq }