From 64ecbb6debdc7023f37f7f0f0b53b98b969a5674 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 11 Mar 2019 13:19:46 +0000 Subject: [PATCH] [CGP] Limit distance between overflow math and cmp Inserting an overflowing arithmetic intrinsic can increase register pressure by producing two values at a point where only one is needed, while the second use maybe several blocks away. This increase in pressure is likely to be more detrimental on performance than rematerialising one of the original instructions. So, check that the arithmetic and compare instructions are no further apart than their immediate successor/predecessor. Differential Revision: https://reviews.llvm.org/D59024 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355823 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGenPrepare.cpp | 11 ++++ .../CodeGenPrepare/ARM/overflow-intrinsics.ll | 56 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 80335f55c77..af1b65253f8 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1180,6 +1180,17 @@ static bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, bool MathDominates = DT.dominates(BO, Cmp); if (!MathDominates && !DT.dominates(Cmp, BO)) return false; + + // Check that the insertion doesn't create a value that is live across more + // than two blocks, so to minimise the increase in register pressure. + if (BO->getParent() != Cmp->getParent()) { + BasicBlock *Dominator = MathDominates ? BO->getParent() : Cmp->getParent(); + BasicBlock *Dominated = MathDominates ? Cmp->getParent() : BO->getParent(); + auto Successors = successors(Dominator); + if (llvm::find(Successors, Dominated) == Successors.end()) + return false; + } + InsertPt = MathDominates ? cast(BO) : cast(Cmp); } diff --git a/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll b/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll new file mode 100644 index 00000000000..3fbc2133141 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll @@ -0,0 +1,56 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-arm-none-eabi" + +; CHECK-LABEL: uadd_overflow_too_far_cmp_dom +; CHECK-NOT: with.overflow.i32 +define i32 @uadd_overflow_too_far_cmp_dom(i32 %arg0) { +entry: + %cmp = icmp ne i32 %arg0, 0 + br i1 %cmp, label %if.else, label %if.then + +if.then: + call void @foo() + br label %exit + +if.else: + call void @bar() + br label %if.end + +if.end: + %dec = add nsw i32 %arg0, -1 + br label %exit + +exit: + %res = phi i32 [ %arg0, %if.then ], [ %dec, %if.end ] + ret i32 %res +} + +; CHECK-LABEL: uadd_overflow_too_far_math_dom +; CHECK-NOT: with.overflow.i32 +define i32 @uadd_overflow_too_far_math_dom(i32 %arg0, i32 %arg1) { +entry: + %dec = add nsw i32 %arg0, -1 + %cmp = icmp ugt i32 %arg0, 1 + br i1 %cmp, label %if.else, label %if.then + +if.then: + call void @foo() + br label %if.end + +if.else: + call void @bar() + br label %if.end + +if.end: + %cmp.i.i = icmp ne i32 %arg0, 0 + %tobool = zext i1 %cmp.i.i to i32 + br label %exit + +exit: + ret i32 %tobool +} + +declare void @foo() +declare void @bar() -- 2.50.1