From: Adrian Prantl Date: Thu, 16 Mar 2017 21:14:09 +0000 (+0000) Subject: Salvage debug info from instructions about to be deleted X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b70598b6c68514ad181960cfccc90ce4db583264;p=llvm Salvage debug info from instructions about to be deleted [Reapplies r297971 and punting on finding a better API for findDbgValues()] This patch improves debug info quality in InstCombine by looking at values that are about to be deleted, checking whether there are any dbg.value instrinsics referring to them, and potentially encoding the semantics of the deleted instruction into the dbg.value's DIExpression. In the example in the testcase (which was extracted from XNU) there is a sequence of %4 = load %struct.entry*, %struct.entry** %next2, align 8, !dbg !41 %5 = bitcast %struct.entry* %4 to i8*, !dbg !42 %add.ptr4 = getelementptr inbounds i8, i8* %5, i64 -8, !dbg !43 %6 = bitcast i8* %add.ptr4 to %struct.entry*, !dbg !44 call void @llvm.dbg.value(metadata %struct.entry* %6, i64 0, metadata !20, metadata !21), !dbg 34 When these instructions are eliminated by instcombine one after another, we can still salvage the otherwise dead debug info: - Bitcasts have no effect, so have the dbg.value point to operand(0) - Loads can be expressed via a DW_OP_deref - Constant gep instructions can be replaced by DWARF expression arithmetic The API introduced by this patch is not specific to instcombine and can be useful in other places, too. rdar://problem/30725338 Differential Revision: https://reviews.llvm.org/D30919 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297994 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index aea0ff91980..2317747bcde 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -282,9 +282,12 @@ bool LowerDbgDeclare(Function &F); /// Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any. DbgDeclareInst *FindAllocaDbgDeclare(Value *V); -/// Finds the llvm.dbg.value intrinsics describing a value, if any. +/// Finds the llvm.dbg.value intrinsics describing a value. void findDbgValues(SmallVectorImpl &DbgValues, Value *V); +/// Constants for \p replaceDbgDeclare and friends. +enum { NoDeref = false, WithDeref = true }; + /// Replaces llvm.dbg.declare instruction when the address it describes /// is replaced with a new value. If Deref is true, an additional DW_OP_deref is /// prepended to the expression. If Offset is non-zero, a constant displacement @@ -310,6 +313,11 @@ bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, DIBuilder &Builder, int Offset = 0); +/// Assuming the instruction \p I is going to be deleted, attempt to salvage any +/// dbg.value intrinsics referring to \p I by rewriting its effect into a +/// DIExpression. +void salvageDebugInfo(Instruction &I); + /// Remove all instructions from a basic block other than it's terminator /// and any present EH pad instructions. unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB); diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 0d76cdf371e..0a3382e1903 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -28,6 +28,9 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/IR/DIBuilder.h" #define DEBUG_TYPE "instcombine" @@ -470,8 +473,9 @@ public: /// methods should return the value returned by this function. Instruction *eraseInstFromFunction(Instruction &I) { DEBUG(dbgs() << "IC: ERASE " << I << '\n'); - assert(I.use_empty() && "Cannot erase instruction that is used!"); + salvageDebugInfo(I); + // Make sure that we reprocess all operands now that we reduced their // use counts. if (I.getNumOperands() < 8) { diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index acbc131118f..46996323b59 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1259,36 +1259,32 @@ void llvm::findDbgValues(SmallVectorImpl &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void DIExprAddDeref(SmallVectorImpl &Expr) { - Expr.push_back(dwarf::DW_OP_deref); -} - -static void DIExprAddOffset(SmallVectorImpl &Expr, int Offset) { +static void appendOffset(SmallVectorImpl &Ops, int64_t Offset) { if (Offset > 0) { - Expr.push_back(dwarf::DW_OP_plus); - Expr.push_back(Offset); + Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(Offset); } else if (Offset < 0) { - Expr.push_back(dwarf::DW_OP_minus); - Expr.push_back(-Offset); + Ops.push_back(dwarf::DW_OP_minus); + Ops.push_back(-Offset); } } -static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder, - DIExpression *DIExpr, bool Deref, - int Offset) { +/// Prepend \p DIExpr with a deref and offset operation. +static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, + bool Deref, int64_t Offset) { if (!Deref && !Offset) return DIExpr; // Create a copy of the original DIDescriptor for user variable, prepending // "deref" operation to a list of address elements, as new llvm.dbg.declare // will take a value storing address of the memory for variable, not // alloca itself. - SmallVector NewDIExpr; + SmallVector Ops; if (Deref) - DIExprAddDeref(NewDIExpr); - DIExprAddOffset(NewDIExpr, Offset); + Ops.push_back(dwarf::DW_OP_deref); + appendOffset(Ops, Offset); if (DIExpr) - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); - return Builder.createExpression(NewDIExpr); + Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + return Builder.createExpression(Ops); } bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, @@ -1302,7 +1298,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset); + DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset); // Insert llvm.dbg.declare immediately after the original alloca, and remove // old llvm.dbg.declare. @@ -1334,11 +1330,11 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, // Insert the offset immediately after the first deref. // We could just change the offset argument of dbg.value, but it's unsigned... if (Offset) { - SmallVector NewDIExpr; - DIExprAddDeref(NewDIExpr); - DIExprAddOffset(NewDIExpr, Offset); - NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); - DIExpr = Builder.createExpression(NewDIExpr); + SmallVector Ops; + Ops.push_back(dwarf::DW_OP_deref); + appendOffset(Ops, Offset); + Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); + DIExpr = Builder.createExpression(Ops); } Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr, @@ -1357,6 +1353,53 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, } } +void llvm::salvageDebugInfo(Instruction &I) { + SmallVector DbgValues; + auto &M = *I.getModule(); + + auto MDWrap = [&](Value *V) { + return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); + }; + + if (auto *BitCast = dyn_cast(&I)) { + findDbgValues(DbgValues, BitCast); + for (auto *DVI : DbgValues) { + // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value + // to use the cast's source. + DVI->setOperand(0, MDWrap(I.getOperand(0))); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + } + } else if (auto *GEP = dyn_cast(&I)) { + findDbgValues(DbgValues, GEP); + for (auto *DVI : DbgValues) { + unsigned BitWidth = + M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); + APInt Offset(BitWidth, 0); + // Rewrite a constant GEP into a DIExpression. + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { + auto *DIExpr = DVI->getExpression(); + DIBuilder DIB(M, /*AllowUnresolved*/ false); + // GEP offsets are i32 and thus alwaus fit into an int64_t. + DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue()); + DVI->setOperand(0, MDWrap(I.getOperand(0))); + DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + } + } + } else if (auto *Load = dyn_cast(&I)) { + findDbgValues(DbgValues, Load); + for (auto *DVI : DbgValues) { + // Rewrite the load into DW_OP_deref. + auto *DIExpr = DVI->getExpression(); + DIBuilder DIB(M, /*AllowUnresolved*/ false); + DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0); + DVI->setOperand(0, MDWrap(I.getOperand(0))); + DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + } + } +} + unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { unsigned NumDeadInst = 0; // Delete the instructions backwards, as it has a reduced likelihood of diff --git a/test/Transforms/InstCombine/debuginfo-dce.ll b/test/Transforms/InstCombine/debuginfo-dce.ll new file mode 100644 index 00000000000..e23aef7334d --- /dev/null +++ b/test/Transforms/InstCombine/debuginfo-dce.ll @@ -0,0 +1,106 @@ +; RUN: opt -instcombine %s -S -o - | FileCheck %s +; Verify that the eliminated instructions (bitcast, gep, load) are salvaged into +; a DIExpression. +; +; Originally created from the following C source and then heavily isolated/reduced. +; +; struct entry { +; struct entry *next; +; }; +; void scan(struct entry *queue, struct entry *end) +; { +; struct entry *entry; +; for (entry = (struct entry *)((char *)(queue->next) - 8); +; &entry->next == end; +; entry = (struct entry *)((char *)(entry->next) - 8)) { +; } +; } + +; ModuleID = '' +source_filename = "test.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +%struct.entry = type { %struct.entry* } + +; Function Attrs: nounwind ssp uwtable +define void @salvage_load(%struct.entry** %queue) local_unnamed_addr #0 !dbg !14 { +entry: + %im_not_dead = alloca %struct.entry* + %0 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19 + %1 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19 + call void @llvm.dbg.value(metadata %struct.entry* %1, i64 0, metadata !18, metadata !20), !dbg !19 +; CHECK: define void @salvage_load +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry** %queue, i64 0, +; CHECK-SAME: metadata ![[LOAD_EXPR:[0-9]+]]) + store %struct.entry* %1, %struct.entry** %im_not_dead, align 8 + ret void, !dbg !21 +} + +; Function Attrs: nounwind ssp uwtable +define void @salvage_bitcast(%struct.entry* %queue) local_unnamed_addr #0 !dbg !14 { +entry: + %im_not_dead = alloca i8* + %0 = bitcast %struct.entry* %queue to i8*, !dbg !19 + %1 = bitcast %struct.entry* %queue to i8*, !dbg !19 + call void @llvm.dbg.value(metadata i8* %1, i64 0, metadata !18, metadata !20), !dbg !19 +; CHECK: define void @salvage_bitcast +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0, +; CHECK-SAME: metadata ![[BITCAST_EXPR:[0-9]+]]) + store i8* %1, i8** %im_not_dead, align 8 + ret void, !dbg !21 +} + +; Function Attrs: nounwind ssp uwtable +define void @salvage_gep(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !14 { +entry: + %im_not_dead = alloca %struct.entry** + %0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19 + %1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19 + call void @llvm.dbg.value(metadata %struct.entry** %1, i64 0, metadata !18, metadata !20), !dbg !19 +; CHECK: define void @salvage_gep +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0, +; CHECK-SAME: metadata ![[GEP_EXPR:[0-9]+]]) + store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8 + ret void, !dbg !21 +} + +; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus, 0) +; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus, 0) +; CHECK: ![[GEP_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_plus, 0) + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 + +attributes #0 = { nounwind ssp uwtable } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!10, !11, !12} +!llvm.ident = !{!13} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{!4, !8} +!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64) +!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "entry", file: !1, line: 1, size: 64, elements: !6) +!6 = !{!7} +!7 = !DIDerivedType(tag: DW_TAG_member, name: "next", scope: !5, file: !1, line: 2, baseType: !4, size: 64) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!9 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!10 = !{i32 2, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"PIC Level", i32 2} +!13 = !{!"clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)"} +!14 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17) +!15 = !DISubroutineType(types: !16) +!16 = !{null, !4, !4} +!17 = !{!18} +!18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4) +!19 = !DILocation(line: 6, column: 17, scope: !14) +!20 = !DIExpression(DW_OP_plus, 0) +!21 = !DILocation(line: 11, column: 1, scope: !14)