From: Robert Lougher Date: Thu, 23 May 2019 18:15:12 +0000 (+0000) Subject: Resubmit r360436 "[X86] Avoid SFB - Fix inconsistent codegen with/without debug info" X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3891f7294d605753b7370894c40118d6986a53bd;p=llvm Resubmit r360436 "[X86] Avoid SFB - Fix inconsistent codegen with/without debug info" Fixes https://bugs.llvm.org/show_bug.cgi?id=40969 The functions findPotentiallyBlockedCopies and buildCopy are currently not accounting for the presence of debug instructions. In the former this results in the optimization not being trigerred, and in the latter results in inconsistent codegen. This patch enables the optimization to be performed in a debug build and ensures the codegen is consistent with non-debug builds. Patch by Chris Dawson. Differential Revision: https://reviews.llvm.org/D61680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361527 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 3ac0b1ae514..a9d807b733b 100644 --- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -406,7 +406,10 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode, // If the load and store are consecutive, use the loadInst location to // reduce register pressure. MachineInstr *StInst = StoreInst; - if (StoreInst->getPrevNode() == LoadInst) + auto PrevInstrIt = skipDebugInstructionsBackward( + std::prev(MachineBasicBlock::instr_iterator(StoreInst)), + MBB->instr_begin()); + if (PrevInstrIt.getNodePtr() == LoadInst) StInst = LoadInst; MachineInstr *NewStore = BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode)) @@ -491,19 +494,22 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst, static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) { MachineOperand &LoadBase = getBaseOperand(LoadInst); MachineOperand &StoreBase = getBaseOperand(StoreInst); + auto StorePrevNonDbgInstr = skipDebugInstructionsBackward( + std::prev(MachineBasicBlock::instr_iterator(StoreInst)), + LoadInst->getParent()->instr_begin()).getNodePtr(); if (LoadBase.isReg()) { MachineInstr *LastLoad = LoadInst->getPrevNode(); // If the original load and store to xmm/ymm were consecutive // then the partial copies were also created in // a consecutive order to reduce register pressure, // and the location of the last load is before the last store. - if (StoreInst->getPrevNode() == LoadInst) + if (StorePrevNonDbgInstr == LoadInst) LastLoad = LoadInst->getPrevNode()->getPrevNode(); getBaseOperand(LastLoad).setIsKill(LoadBase.isKill()); } if (StoreBase.isReg()) { MachineInstr *StInst = StoreInst; - if (StoreInst->getPrevNode() == LoadInst) + if (StorePrevNonDbgInstr == LoadInst) StInst = LoadInst; getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill()); } @@ -530,7 +536,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) { if (!isPotentialBlockedMemCpyLd(MI.getOpcode())) continue; int DefVR = MI.getOperand(0).getReg(); - if (!MRI->hasOneUse(DefVR)) + if (!MRI->hasOneNonDBGUse(DefVR)) continue; for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end(); UI != UE;) { diff --git a/test/CodeGen/X86/avoid-sfb-g-no-change.mir b/test/CodeGen/X86/avoid-sfb-g-no-change.mir new file mode 100644 index 00000000000..82cb9a786c2 --- /dev/null +++ b/test/CodeGen/X86/avoid-sfb-g-no-change.mir @@ -0,0 +1,222 @@ +# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes DEBUG-LABEL,CHECK +# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes NODEBUG-LABEL,CHECK +# +# This was generated from: +# +# using alpha = float __attribute__((ext_vector_type(4))); +# +# void bravo(alpha * __restrict__ p1, alpha * __restrict__ p2) { +# char *p3 = (char *)p1; +# *p3 = 0; +# alpha t = *p1; +# *p2 = t; +# } +# +# Using the command line: +# clang -g -c 1.cpp -O2 -S -emit-llvm -fno-strict-aliasing --target=x86_64-unknown-unknown -o test.ll +# llc -stop-before=x86-avoid-SFB test.ll -o before.mir + +--- | + ; ModuleID = 'test.ll' + source_filename = "1.cpp" + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-unknown" + + ; Function Attrs: norecurse nounwind uwtable + define dso_local void @debug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 !dbg !10 { + entry: + call void @llvm.dbg.value(metadata <4 x float>* %p1, metadata !21, metadata !DIExpression()), !dbg !25 + call void @llvm.dbg.value(metadata <4 x float>* %p2, metadata !22, metadata !DIExpression()), !dbg !25 + %0 = bitcast <4 x float>* %p1 to i8*, !dbg !26 + call void @llvm.dbg.value(metadata i8* %0, metadata !23, metadata !DIExpression()), !dbg !25 + store i8 0, i8* %0, align 1, !dbg !27 + %1 = load <4 x float>, <4 x float>* %p1, align 16, !dbg !28 + call void @llvm.dbg.value(metadata <4 x float> %1, metadata !24, metadata !DIExpression()), !dbg !25 + store <4 x float> %1, <4 x float>* %p2, align 16, !dbg !29 + ret void, !dbg !30 + } + + ; Function Attrs: norecurse nounwind uwtable + define dso_local void @nodebug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 { + entry: + %0 = bitcast <4 x float>* %p1 to i8* + store i8 0, i8* %0, align 1 + %1 = load <4 x float>, <4 x float>* %p1, align 16 + store <4 x float> %1, <4 x float>* %p2, align 16 + ret void + } + + ; Function Attrs: nounwind readnone speculatable + declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone speculatable } + attributes #2 = { nounwind } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!6, !7, !8} + !llvm.ident = !{!9} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None) + !1 = !DIFile(filename: "1.cpp", directory: "C:\5CUsers\5Cgbdawsoc\5CDocuments\5Cllvm\5Cbg40969") + !2 = !{} + !3 = !{!4} + !4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64) + !5 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) + !6 = !{i32 2, !"Dwarf Version", i32 4} + !7 = !{i32 2, !"Debug Info Version", i32 3} + !8 = !{i32 1, !"wchar_size", i32 4} + !9 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)"} + !10 = distinct !DISubprogram(name: "bravo", linkageName: "_Z5bravoPDv4_fS0_", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20) + !11 = !DISubroutineType(types: !12) + !12 = !{null, !13, !13} + !13 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !14) + !14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) + !15 = !DIDerivedType(tag: DW_TAG_typedef, name: "alpha", file: !1, line: 2, baseType: !16) + !16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 128, flags: DIFlagVector, elements: !18) + !17 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) + !18 = !{!19} + !19 = !DISubrange(count: 4) + !20 = !{!21, !22, !23, !24} + !21 = !DILocalVariable(name: "p1", arg: 1, scope: !10, file: !1, line: 4, type: !13) + !22 = !DILocalVariable(name: "p2", arg: 2, scope: !10, file: !1, line: 4, type: !13) + !23 = !DILocalVariable(name: "p3", scope: !10, file: !1, line: 5, type: !4) + !24 = !DILocalVariable(name: "t", scope: !10, file: !1, line: 7, type: !15) + !25 = !DILocation(line: 0, scope: !10) + !26 = !DILocation(line: 5, column: 14, scope: !10) + !27 = !DILocation(line: 6, column: 7, scope: !10) + !28 = !DILocation(line: 7, column: 13, scope: !10) + !29 = !DILocation(line: 8, column: 7, scope: !10) + !30 = !DILocation(line: 9, column: 1, scope: !10) + +... +--- +name: debug +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: vr128, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $rdi, $rsi + + DBG_VALUE $rdi, $noreg, !21, !DIExpression(), debug-location !25 + DBG_VALUE $rsi, $noreg, !22, !DIExpression(), debug-location !25 + %1:gr64 = COPY $rsi + DBG_VALUE %1, $noreg, !22, !DIExpression(), debug-location !25 + %0:gr64 = COPY $rdi + DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25 + DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25 + MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store 1 into %ir.0) + %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load 16 from %ir.p1) + DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !25 + MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store 16 into %ir.p2) + RET 0, debug-location !30 + +... +--- +name: nodebug +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: vr128, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $rdi, $rsi + + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + MOV8mi %0, 1, $noreg, 0, $noreg, 0 :: (store 1 into %ir.0) + %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1) + MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2 :: (store 16 into %ir.p2) + RET 0 + + ; DEBUG-LABEL: name: debug + ; NODEBUG-LABEL: name: nodebug + ; CHECK: %1:gr64 = COPY + ; CHECK: %0:gr64 = COPY + ; CHECK: MOV8mi + ; CHECK: %3:gr8 = MOV8rm + ; CHECK: MOV8mr + ; CHECK: %4:gr64 = MOV64rm + ; CHECK: MOV64mr + ; CHECK: %5:gr32 = MOV32rm + ; CHECK: MOV32mr + ; CHECK: %6:gr16 = MOV16rm + ; CHECK: MOV16mr + ; CHECK: %7:gr8 = MOV8rm + ; CHECK: MOV8mr + ; CHECK: RET 0 + ; DEBUG-LABEL: name: nodebug +...