From: Jinsong Ji Date: Fri, 12 Jul 2019 01:59:42 +0000 (+0000) Subject: [MachinePipeliner] Fix order for nodes with Anti dependence in same cycle X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f85189abaa8bbad78b5099bdc4e71041870c4247;p=llvm [MachinePipeliner] Fix order for nodes with Anti dependence in same cycle Summary: Problem exposed in PowerPC functional testing. We did not consider Anti dependence for nodes in same cycle, so we may end up generating bad machine code. eg: the reduced test won't verify. *** Bad machine code: Using an undefined physical register *** - function: lame_encode_buffer_interleaved - basic block: %bb.4 (0x4bde4e12928) - instruction: %29:gprc = ADDZE %27:gprc, implicit-def dead $carry, implicit $carry - operand 3: implicit $carry Reviewers: bcahoon, kparzysz, hfinkel Subscribers: MaskRay, wuzish, nemanjai, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64192 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365859 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 9c0c5cc5c70..54df522d371 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -3559,6 +3559,14 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, if (Pos < MoveUse) MoveUse = Pos; } + // We did not handle HW dependences in previous for loop, + // and we normally set Latency = 0 for Anti deps, + // so may have nodes in same cycle with Anti denpendent on HW regs. + else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) { + OrderBeforeUse = true; + if ((MoveUse == 0) || (Pos < MoveUse)) + MoveUse = Pos; + } } for (auto &P : SU->Preds) { if (P.getSUnit() != *I) diff --git a/test/CodeGen/PowerPC/sms-grp-order.ll b/test/CodeGen/PowerPC/sms-grp-order.ll new file mode 100644 index 00000000000..a3e6de1c593 --- /dev/null +++ b/test/CodeGen/PowerPC/sms-grp-order.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\ +; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s + +define void @lame_encode_buffer_interleaved() local_unnamed_addr { +; CHECK-LABEL: lame_encode_buffer_interleaved: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz 3, 0(0) +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: sldi 5, 5, 62 +; CHECK-NEXT: lhz 4, 0(3) +; CHECK-NEXT: mtctr 5 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_1: # +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: extsh 4, 4 +; CHECK-NEXT: srawi 3, 3, 1 +; CHECK-NEXT: addze 3, 3 +; CHECK-NEXT: srawi 4, 4, 1 +; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: bdnz .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: sth 3, 0(0) +; CHECK-NEXT: sth 4, 0(3) +; CHECK-NEXT: blr + br label %1 + +1: ; preds = %1, %0 + %2 = phi i64 [ 0, %0 ], [ %13, %1 ] + %3 = load i16, i16* null, align 2 + %4 = load i16, i16* undef, align 2 + %5 = sext i16 %3 to i32 + %6 = sext i16 %4 to i32 + %7 = add nsw i32 0, %5 + %8 = add nsw i32 0, %6 + %9 = sdiv i32 %7, 2 + %10 = sdiv i32 %8, 2 + %11 = trunc i32 %9 to i16 + %12 = trunc i32 %10 to i16 + store i16 %11, i16* null, align 2 + store i16 %12, i16* undef, align 2 + %13 = add i64 %2, 4 + %14 = icmp eq i64 %13, 0 + br i1 %14, label %15, label %1 + +15: ; preds = %1 + ret void +}