From 43bd69c68c109e89158d4d3cafc6be4a4f56e299 Mon Sep 17 00:00:00 2001
From: Joerg Sonnenberger <joerg@bec.de>
Date: Thu, 28 Sep 2017 14:13:54 +0000
Subject: [PATCH] Merging r311921:
 ------------------------------------------------------------------------
 r311921 | joerg | 2017-08-28 22:20:47 +0200 (Mon, 28 Aug 2017) | 16 lines

Fix ARMv4 support

ARMv4 doesn't support the "BX" instruction, which has been introduced
with ARMv4t. Adjust the call lowering and tail call implementation
accordingly.

Further changes are necessary to ensure that presence of the v4t feature
is correctly set. Most importantly, the "generic" CPU for thumb-*
triples should include ARMv4t, since thumb mode without thumb support
would naturally be pointless.

Add a couple of asserts to ensure thumb instructions are not emitted
without CPU support.

Differential Revision: https://reviews.llvm.org/D37030

------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@314417 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMAsmPrinter.cpp                |  2 ++
 lib/Target/ARM/ARMCallLowering.cpp              |  4 +++-
 lib/Target/ARM/ARMExpandPseudoInsts.cpp         |  5 ++++-
 lib/Target/ARM/ARMFastISel.cpp                  |  5 +++--
 lib/Target/ARM/ARMFrameLowering.cpp             |  5 ++---
 lib/Target/ARM/ARMInstrInfo.td                  |  8 +++++++-
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp        |  1 +
 lib/Target/ARM/ARMSubtarget.h                   | 11 +++++++++++
 lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp |  4 ++--
 test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll |  4 ++--
 test/CodeGen/ARM/armv4.ll                       | 17 ++++++++++++++++-
 test/CodeGen/ARM/debug-segmented-stacks.ll      |  4 ++--
 test/CodeGen/ARM/segmented-stacks-dynamic.ll    |  8 ++++----
 test/CodeGen/ARM/segmented-stacks.ll            |  4 ++--
 14 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 582153daebd..b24d3420d1d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1276,6 +1276,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // Add 's' bit operand (always reg0 for this)
       .addReg(0));
 
+    assert(Subtarget->hasV4TOps());
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
       .addReg(MI->getOperand(0).getReg()));
     return;
@@ -1896,6 +1897,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
+    assert(Subtarget->hasV4TOps());
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
       .addReg(ScratchReg)
       // Predicate.
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 051827a6a6a..a1a31e1e7fa 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -251,7 +251,9 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                   const Value *Val, unsigned VReg) const {
   assert(!Val == !VReg && "Return value without a vreg");
 
-  auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL));
+  auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
+  unsigned Opcode = ST.getReturnOpcode();
+  auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL));
 
   if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
     return false;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 46d8f0dba69..376727729d8 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1030,8 +1030,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         if (STI->isThumb())
           MIB.add(predOps(ARMCC::AL));
       } else if (RetOpcode == ARM::TCRETURNri) {
+        unsigned Opcode =
+          STI->isThumb() ? ARM::tTAILJMPr
+                         : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
         BuildMI(MBB, MBBI, dl,
-                TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+                TII.get(Opcode))
             .addReg(JumpTarget.getReg(), RegState::Kill);
       }
 
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index bf00ef61c2d..5dc93734ab5 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1332,6 +1332,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
   if (AddrReg == 0) return false;
 
   unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+  assert(isThumb2 || Subtarget->hasV4TOps());
+
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(Opc)).addReg(AddrReg));
 
@@ -2168,9 +2170,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
     RetRegs.push_back(VA.getLocReg());
   }
 
-  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(RetOpc));
+                                    TII.get(Subtarget->getReturnOpcode()));
   AddOptionalDefs(MIB);
   for (unsigned R : RetRegs)
     MIB.addReg(R, RegState::Implicit);
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 16b54e8848c..6f380ae2e85 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2397,9 +2397,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
-  // bx lr - Return from this function.
-  Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
-  BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL));
+  // Return from this function.
+  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
 
   // Restore SR0 and SR1 in case of __morestack() was not called.
   // pop {SR0, SR1}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7206083a707..c488cd347fe 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -2425,7 +2425,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
   def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
                                  4, IIC_Br, [],
                                  (BX GPR:$dst)>, Sched<[WriteBr]>,
-                                 Requires<[IsARM]>;
+                                 Requires<[IsARM, HasV4T]>;
 }
 
 // Secure Monitor Call is a system instruction.
@@ -5589,6 +5589,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
                     (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
                   Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
 
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in
+  def TAILJMPr4 : ARMPseudoExpand<(outs), (ins GPR:$dst),
+                    4, IIC_Br, [],
+                    (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+                  Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
+
 // Large immediate handling.
 
 // 32-bit immediate using two piece mod_imms or movw + movt.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 7a452d4a209..5d57b6803c0 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1909,6 +1909,7 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
 
   for (auto Use : Prev->uses())
     if (Use.isKill()) {
+      assert(STI->hasV4TOps());
       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
           .addReg(Use.getReg(), RegState::Kill)
           .add(predOps(ARMCC::AL))
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e15b17512c9..9d749537dc3 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -729,6 +729,17 @@ public:
 
   /// True if fast-isel is used.
   bool useFastISel() const;
+
+  /// Returns the correct return opcode for the current feature set.
+  /// Use BX if available to allow mixing thumb/arm code, but fall back
+  /// to plain mov pc,lr on ARMv4.
+  unsigned getReturnOpcode() const {
+    if (isThumb())
+      return ARM::tBX_RET;
+    if (hasV4TOps())
+      return ARM::BX_RET;
+    return ARM::MOVPCLR;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index b8a8b1f7619..2ab7bfe4410 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -142,9 +142,9 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
 
   if (isThumb) {
     if (ARMArchFeature.empty())
-      ARMArchFeature = "+thumb-mode";
+      ARMArchFeature = "+thumb-mode,+v4t";
     else
-      ARMArchFeature += ",+thumb-mode";
+      ARMArchFeature += ",+thumb-mode,+v4t";
   }
 
   if (TT.isOSNaCl()) {
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index f50916e4b47..82e9b20731e 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
-; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
+; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
 
 define void @test_void_return() {
 ; CHECK-LABEL: name: test_void_return
diff --git a/test/CodeGen/ARM/armv4.ll b/test/CodeGen/ARM/armv4.ll
index 6b213d564bd..a0379caaa4f 100644
--- a/test/CodeGen/ARM/armv4.ll
+++ b/test/CodeGen/ARM/armv4.ll
@@ -5,9 +5,24 @@
 ; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
 
-define i32 @test(i32 %a) nounwind readnone {
+define i32 @test_return(i32 %a) nounwind readnone {
 entry:
+; ARM-LABEL: test_return
 ; ARM: mov pc
+; THUMB-LABEL: test_return
 ; THUMB: bx
   ret i32 %a
 }
+
+@helper = global i32 ()* null, align 4
+
+define i32 @test_indirect() #0 {
+entry:
+; ARM-LABEL: test_indirect
+; ARM: mov pc
+; THUMB-LABEL: test_indirect
+; THUMB: bx
+  %0 = load i32 ()*, i32 ()** @helper, align 4
+  %call = tail call i32 %0()
+  ret i32 %call
+}
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
index 3aa33f75411..6dafcecf0f0 100644
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -filetype=obj
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
diff --git a/test/CodeGen/ARM/segmented-stacks-dynamic.ll b/test/CodeGen/ARM/segmented-stacks-dynamic.ll
index 86f8ff8dd90..65d25cad386 100644
--- a/test/CodeGen/ARM/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/ARM/segmented-stacks-dynamic.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
-; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -filetype=obj
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll
index cbb124de11c..4fe84faa17f 100644
--- a/test/CodeGen/ARM/segmented-stacks.ll
+++ b/test/CodeGen/ARM/segmented-stacks.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t  -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
 
 ; We used to crash with filetype=obj
 ; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj
-- 
2.40.0