From eb181e82b8bf9eb57cdacc2ea16695c828e3c18f Mon Sep 17 00:00:00 2001
From: James Molloy <james.molloy@arm.com>
Date: Mon, 4 Jul 2016 16:35:41 +0000
Subject: [PATCH] [Thumb] Reapply r272251 with a fix for PR28348

We were using DAG->getConstant instead of DAG->getTargetConstant. This meant that we could inadvertently increase the use count of a constant if stars aligned, which it did in this testcase. Increasing the use count of the constant could cause ISel to fall over (because DAGToDAG lowering assumed the constant had only one use!)

Original commit message:
  [Thumb] Select a BIC instead of AND if the immediate can be encoded more optimally negated

  If an immediate is only used in an AND node, it is possible that the immediate can be more optimally materialized when negated. If this is the case, we can negate the immediate and use a BIC instead;

    int i(int a) {
      return a & 0xfffffeec;
    }

  Used to produce:
      ldr r1, [CONSTPOOL]
      ands r0, r1
    CONSTPOOL: 0xfffffeec

  And now produces:
      movs    r1, #255
      adds    r1, #20  ; Less costly immediate generation
      bics    r0, r1

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274510 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMISelDAGToDAG.cpp | 41 +++++++++++++++++++++++++++++-
 test/CodeGen/Thumb/bic_imm.ll      | 12 +++++++++
 test/CodeGen/Thumb2/bicbfi.ll      | 17 +++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/Thumb/bic_imm.ll
 create mode 100644 test/CodeGen/Thumb2/bicbfi.ll
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a7871ea8db5..b5605c08dc4 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2820,6 +2820,45 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     if (tryV6T2BitfieldExtractOp(N, false))
       return;
 
+    // If an immediate is used in an AND node, it is possible that the immediate
+    // can be more optimally materialized when negated. If this is the case we
+    // can negate the immediate and use a BIC instead.
+    auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
+      uint32_t Imm = (uint32_t) N1C->getZExtValue();
+
+      // In Thumb2 mode, an AND can take a 12-bit immediate. If this
+      // immediate can be negated and fit in the immediate operand of
+      // a t2BIC, don't do any manual transform here as this can be
+      // handled by the generic ISel machinery.
+      bool PreferImmediateEncoding =
+          Subtarget->hasThumb2() && !is_t2_so_imm(Imm) && is_t2_so_imm_not(Imm);
+      if (!PreferImmediateEncoding &&
+          ConstantMaterializationCost(Imm) >
+              ConstantMaterializationCost(~Imm)) {
+        // The current immediate costs more to materialize than a negated
+        // immediate, so negate the immediate and use a BIC.
+        SDValue NewImm =
+            CurDAG->getTargetConstant(~N1C->getZExtValue(), dl, MVT::i32);
+        CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
+
+        if (!Subtarget->hasThumb2()) {
+          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
+                           N->getOperand(0), NewImm, getAL(CurDAG, dl),
+                           CurDAG->getRegister(0, MVT::i32)};
+          ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
+          return;
+        } else {
+          SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
+                           CurDAG->getRegister(0, MVT::i32),
+                           CurDAG->getRegister(0, MVT::i32)};
+          ReplaceNode(N,
+                      CurDAG->getMachineNode(ARM::t2BICri, dl, MVT::i32, Ops));
+          return;
+        }
+      }
+    }
+
     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
     // are entirely contributed by c2 and lower 16-bits are entirely contributed
@@ -2834,7 +2873,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     if (!Opc)
       break;
     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    N1C = dyn_cast<ConstantSDNode>(N1);
     if (!N1C)
       break;
     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
diff --git a/test/CodeGen/Thumb/bic_imm.ll b/test/CodeGen/Thumb/bic_imm.ll
new file mode 100644
index 00000000000..078c321b781
--- /dev/null
+++ b/test/CodeGen/Thumb/bic_imm.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m0 -verify-machineinstrs | FileCheck --check-prefix CHECK-T1 %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m3 -verify-machineinstrs | FileCheck --check-prefix CHECK-T2 %s
+
+; CHECK-T1-LABEL: @i
+; CHECK-T2-LABEL: @i
+; CHECK-T1: bics r0, #275
+; CHECK-T2: bic r0, r0, #275
+define i32 @i(i32 %a) {
+entry:
+  %and = and i32 %a, -276
+  ret i32 %and
+}
diff --git a/test/CodeGen/Thumb2/bicbfi.ll b/test/CodeGen/Thumb2/bicbfi.ll
new file mode 100644
index 00000000000..fcdb1225db5
--- /dev/null
+++ b/test/CodeGen/Thumb2/bicbfi.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7--linux-gnueabihf"
+
+; CHECK-LABEL: f:
+; CHECK: bic
+define void @f(i32* nocapture %b, i32* nocapture %c, i32 %a) {
+  %1 = and i32 %a, -4096
+  store i32 %1, i32* %c, align 4
+  %2 = and i32 %a, 4095
+  %3 = or i32 %2, 4096
+  %4 = load i32, i32* %b, align 4
+  %5 = add nsw i32 %4, %3
+  store i32 %5, i32* %b, align 4
+  ret void
+}
\ No newline at end of file
-- 
2.50.0