From b933e7ecf99912556f50f230b6721cd4a65c78e4 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 2 Jul 2019 06:04:46 +0000 Subject: [PATCH] [TailDuplicator] Fix copy instruction emitting into the wrong block. The code for duplicating instructions could sometimes try to emit copies intended to deal with unconstrainable register classes to the tail block of the original instruction, rather than before the newly cloned instruction in the predecessor block. This was exposed by GlobalISel on arm64. Differential Revision: https://reviews.llvm.org/D64049 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364888 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TailDuplicator.cpp | 2 +- test/CodeGen/AArch64/taildup-inst-dup-loc.mir | 125 ++++++++++++++++++ 2 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/taildup-inst-dup-loc.mir diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index 4d3e02e3174..a0590a8a6cc 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -434,7 +434,7 @@ void TailDuplicator::duplicateInstruction( if (NewRC == nullptr) NewRC = OrigRC; unsigned NewReg = MRI->createVirtualRegister(NewRC); - BuildMI(*PredBB, MI, MI->getDebugLoc(), + BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewReg) .addReg(VI->second.Reg, 0, VI->second.SubReg); LocalVRMap.erase(VI); diff --git a/test/CodeGen/AArch64/taildup-inst-dup-loc.mir b/test/CodeGen/AArch64/taildup-inst-dup-loc.mir new file mode 100644 index 00000000000..5954801d992 --- /dev/null +++ b/test/CodeGen/AArch64/taildup-inst-dup-loc.mir @@ -0,0 +1,125 @@ +# RUN: llc -mtriple aarch64 -run-pass=early-tailduplication -verify-machineinstrs %s -o - 2>&1 | FileCheck %s +--- +name: pluto +tracksRegLiveness: true +body: | + ; This test checks that the COPY3 and COPY4 copies are correctly placed in the bb.5 block, + ; instead of crashing. + + ; CHECK-LABEL: name: pluto + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x0 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:gpr32common = IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:gpr64 = IMPLICIT_DEF + ; CHECK: [[DEF3:%[0-9]+]]:gpr64common = IMPLICIT_DEF + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 + ; CHECK: TBNZW [[DEF]], 0, %bb.1 + ; CHECK: B %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[DEF3]], 0 :: (load 8 from `i64* undef`) + ; CHECK: B %bb.9 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK: $wzr = SUBSWri [[DEF1]], 19, 0, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: TBNZW [[CSINCWr]], 0, %bb.3 + ; CHECK: B %bb.4 + ; CHECK: bb.3: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: [[SCVTFUXDri:%[0-9]+]]:fpr64 = SCVTFUXDri [[DEF2]] + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SCVTFUXDri]] + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[SCVTFUXDri]] + ; CHECK: B %bb.9 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; CHECK: TBNZW [[DEF]], 0, %bb.5 + ; CHECK: B %bb.8 + ; CHECK: bb.5: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[DEF2]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] + ; CHECK: [[COPY4:%[0-9]+]]:fpr64 = COPY [[DEF2]] + ; CHECK: B %bb.9 + ; CHECK: bb.8: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: bb.9: + ; CHECK: [[PHI:%[0-9]+]]:gpr64 = PHI [[LDRXui]], %bb.1, [[FMOVD0_]], %bb.8, [[COPY]], %bb.3, [[COPY3]], %bb.5 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $d0 = COPY [[PHI]] + ; CHECK: BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit-def $d0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $w0 = COPY [[MOVi32imm]] + ; CHECK: RET_ReallyLR implicit $w0 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $x0 + + %1:gpr32 = IMPLICIT_DEF + %2:gpr32common = IMPLICIT_DEF + %5:gpr64 = IMPLICIT_DEF + %9:gpr64common = IMPLICIT_DEF + %13:gpr32 = MOVi32imm 1 + %14:fpr64 = FMOVD0 + TBNZW %1, 0, %bb.2 + B %bb.3 + + bb.2: + successors: %bb.8(0x80000000) + + %8:gpr64 = LDRXui %9, 0 :: (load 8 from `i64* undef`) + B %bb.8 + + bb.3: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + $wzr = SUBSWri %2, 19, 0, implicit-def $nzcv + %15:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + TBNZW %15, 0, %bb.4 + B %bb.5 + + bb.4: + successors: %bb.7(0x80000000) + + %6:fpr64 = SCVTFUXDri %5 + B %bb.7 + + bb.5: + successors: %bb.6(0x40000000), %bb.9(0x40000000) + + TBNZW %1, 0, %bb.6 + B %bb.9 + + bb.6: + successors: %bb.7(0x80000000) + + + bb.7: + successors: %bb.8(0x80000000) + + %7:fpr64 = PHI %6, %bb.4, %5, %bb.6 + + bb.8: + successors: %bb.10(0x80000000) + + %10:gpr64 = PHI %8, %bb.2, %7, %bb.7 + B %bb.10 + + bb.9: + successors: %bb.10(0x80000000) + + + bb.10: + %11:gpr64 = PHI %10, %bb.8, %14, %bb.9 + ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + $d0 = COPY %11 + BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit-def $d0 + ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + $w0 = COPY %13 + RET_ReallyLR implicit $w0 + +... -- 2.50.1