From 71efcdf2ccf360d1ddb7cbf5eef2e6b22d9251b6 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 4 Oct 2019 19:51:40 +0000 Subject: [PATCH] [ScheduleDAG] When a node is cloned, add an edge between the nodes. InstrEmitter's virtual register handling assumes that clones are emitted after the cloned node. Make sure this assumption actually holds. Fixes a "Node emitted out of order - early" assertion on the testcase. This is probably a very rare case to actually hit in practice; even without the explicit edge, the scheduler will usually end up scheduling the nodes in the expected order due to other constraints. Differential Revision: https://reviews.llvm.org/D68068 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373782 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/ScheduleDAGRRList.cpp | 4 ++ .../CodeGen/Thumb/scheduler-clone-cpsr-def.ll | 41 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1598e4dfefd..ff806bdb822 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!Pred.isArtificial()) AddPredQueued(NewSU, Pred); + // Make sure the clone comes after the original. (InstrEmitter assumes + // this ordering.) + AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector, 4> DelDeps; diff --git a/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll b/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll new file mode 100644 index 00000000000..31e54c43c1e --- /dev/null +++ b/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv6-linux-gnueabi < %s | FileCheck %s + +; After various DAGCombine optimizations, we end up with an sbcs with +; multiple uses of the cpsr def, and we therefore clone the subs/sbcs. +; Make sure this doesn't crash. +; +; The output here might change at some point in the future, and no +; longer clone the operations; if that happens, there probably isn't any +; straightforward way to fix the test. +define i64 @f(i64 %x2, i32 %z) { +; CHECK-LABEL: f: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: subs r3, r0, #1 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: sbcs r3, r2 +; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: adcs r3, r2 +; CHECK-NEXT: movs r4, #30 +; CHECK-NEXT: subs r5, r0, #1 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: sbcs r5, r2 +; CHECK-NEXT: adcs r4, r2 +; CHECK-NEXT: lsls r2, r1, #1 +; CHECK-NEXT: lsls r2, r4 +; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: eors r4, r3 +; CHECK-NEXT: lsrs r0, r4 +; CHECK-NEXT: orrs r0, r2 +; CHECK-NEXT: lsrs r1, r4 +; CHECK-NEXT: pop {r4, r5, r7, pc} + %x3 = add nsw i64 %x2, -1 + %x8 = icmp ne i64 %x2, 0 + %x9 = xor i1 %x8, true + %x10 = zext i1 %x9 to i64 + %x11 = lshr i64 %x2, %x10 + ret i64 %x11 +} -- 2.40.0