const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
+ if (Subtarget->isThumb1Only())
+ EmitAlignment(2);
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
.addReg(0));
return;
}
+ case ARM::tTBB_JT:
+ case ARM::tTBH_JT: {
+
+ bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT;
+ unsigned Base = MI->getOperand(0).getReg();
+ unsigned Idx = MI->getOperand(1).getReg();
+ assert(MI->getOperand(1).isKill() && "We need the index register as scratch!");
+
+ // Multiply up idx if necessary.
+ if (!Is8Bit)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
+ .addReg(Idx)
+ .addReg(ARM::CPSR)
+ .addReg(Idx)
+ .addImm(1)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ if (Base == ARM::PC) {
+ // TBB [base, idx] =
+ // ADDS idx, idx, base
+ // LDRB idx, [idx, #4] ; or LDRH if TBH
+ // LSLS idx, #1
+ // ADDS pc, pc, idx
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+ .addReg(Idx)
+ .addReg(Idx)
+ .addReg(Base)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ unsigned Opc = Is8Bit ? ARM::tLDRBi : ARM::tLDRHi;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+ .addReg(Idx)
+ .addReg(Idx)
+ .addImm(Is8Bit ? 4 : 2)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ } else {
+ // TBB [base, idx] =
+ // LDRB idx, [base, idx] ; or LDRH if TBH
+ // LSLS idx, #1
+ // ADDS pc, pc, idx
+
+ unsigned Opc = Is8Bit ? ARM::tLDRBr : ARM::tLDRHr;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+ .addReg(Idx)
+ .addReg(Base)
+ .addReg(Idx)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ }
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
+ .addReg(Idx)
+ .addReg(ARM::CPSR)
+ .addReg(Idx)
+ .addImm(1)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addReg(Idx)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
case ARM::tBR_JTr:
case ARM::BR_JTr: {
// Lower and emit the instruction itself, then the jump table following it.
CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
cl::desc("The max number of iteration for converge"));
+static cl::opt<bool> SynthesizeThumb1TBB(
+ "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true),
+ cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
+ "equivalent to the TBB/TBH instructions"));
+
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
bool isThumb;
bool isThumb1;
bool isThumb2;
+ bool isPositionIndependentOrROPI;
public:
static char ID;
ARMConstantIslands() : MachineFunctionPass(ID) {}
STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
TII = STI->getInstrInfo();
+ isPositionIndependentOrROPI =
+ STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
AFI = MF->getInfo<ARMFunctionInfo>();
isThumb = AFI->isThumbFunction();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
+ bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
// This pass invalidates liveness information when it splits basic blocks.
MF->getRegInfo().invalidateLiveness();
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
- if (isThumb2 && AdjustJumpTableBlocks) {
+ if (GenerateTBB && AdjustJumpTableBlocks) {
scanFunctionJumpTables();
MadeChange |= reorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
MadeChange |= optimizeThumb2Branches();
// Optimize jump tables using TBB / TBH.
- if (isThumb2)
+ if (GenerateTBB)
MadeChange |= optimizeThumb2JumpTables();
// After a while, this might be made debug-only, but it is not expensive.
case ARM::t2BR_JT:
JTOpcode = ARM::JUMPTABLE_INSTS;
break;
+ case ARM::tTBB_JT:
case ARM::t2TBB_JT:
JTOpcode = ARM::JUMPTABLE_TBB;
break;
+ case ARM::tTBH_JT:
case ARM::t2TBH_JT:
JTOpcode = ARM::JUMPTABLE_TBH;
break;
case ARM::CONSTPOOL_ENTRY:
break;
case ARM::JUMPTABLE_TBB:
- return 0;
+ return isThumb1 ? 2 : 0;
case ARM::JUMPTABLE_TBH:
+ return isThumb1 ? 2 : 1;
case ARM::JUMPTABLE_INSTS:
return 1;
case ARM::JUMPTABLE_ADDRS:
void ARMConstantIslands::scanFunctionJumpTables() {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &I : MBB)
- if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT)
+ if (I.isBranch() &&
+ (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
T2JumpTables.push_back(&I);
}
}
default:
continue; // Ignore other JT branches
case ARM::t2BR_JT:
+ case ARM::tBR_JTr:
T2JumpTables.push_back(&I);
continue; // Does not get an entry in ImmBranches
case ARM::Bcc:
if (RemovableAdd) {
RemovableAdd->eraseFromParent();
- DeadSize += 4;
+ DeadSize += isThumb2 ? 4 : 2;
} else if (BaseReg == EntryReg) {
// The add wasn't removable, but clobbered the base for the TBB. So we can't
// preserve it.
if (!ByteOk && !HalfWordOk)
continue;
+ CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
MachineBasicBlock *MBB = MI->getParent();
if (!MI->getOperand(0).isKill()) // FIXME: needed now?
continue;
- unsigned IdxReg = MI->getOperand(1).getReg();
- bool IdxRegKill = MI->getOperand(1).isKill();
- CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
unsigned DeadSize = 0;
bool CanDeleteLEA = false;
bool BaseRegKill = false;
- bool PreservedBaseReg =
+
+ unsigned IdxReg = ~0U;
+ bool IdxRegKill = true;
+ if (isThumb2) {
+ IdxReg = MI->getOperand(1).getReg();
+ IdxRegKill = MI->getOperand(1).isKill();
+
+ bool PreservedBaseReg =
preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
+ if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+ continue;
+ } else {
+ // We're in thumb-1 mode, so we must have something like:
+ // %idx = tLSLri %idx, 2
+ // %base = tLEApcrelJT
+ // %t = tLDRr %idx, %base
+ unsigned BaseReg = User.MI->getOperand(0).getReg();
+
+ if (User.MI->getIterator() == User.MI->getParent()->begin())
+ continue;
+ MachineInstr *Shift = User.MI->getPrevNode();
+ if (Shift->getOpcode() != ARM::tLSLri ||
+ Shift->getOperand(3).getImm() != 2 ||
+ !Shift->getOperand(2).isKill())
+ continue;
+ IdxReg = Shift->getOperand(2).getReg();
+ unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
- if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
- continue;
+ MachineInstr *Load = User.MI->getNextNode();
+ if (Load->getOpcode() != ARM::tLDRr)
+ continue;
+ if (Load->getOperand(1).getReg() != ShiftedIdxReg ||
+ Load->getOperand(2).getReg() != BaseReg ||
+ !Load->getOperand(1).isKill())
+ continue;
+ // If we're in PIC mode, there should be another ADD following.
+ if (isPositionIndependentOrROPI) {
+ MachineInstr *Add = Load->getNextNode();
+ if (Add->getOpcode() != ARM::tADDrr ||
+ Add->getOperand(2).getReg() != Load->getOperand(0).getReg() ||
+ Add->getOperand(3).getReg() != BaseReg ||
+ !Add->getOperand(2).isKill())
+ continue;
+ if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
+ continue;
+
+ Add->eraseFromParent();
+ DeadSize += 2;
+ } else {
+ if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
+ continue;
+ }
+
+
+ // Now safe to delete the load and lsl. The LEA will be removed later.
+ CanDeleteLEA = true;
+ Shift->eraseFromParent();
+ Load->eraseFromParent();
+ DeadSize += 4;
+ }
+
DEBUG(dbgs() << "Shrink JT: " << *MI);
MachineInstr *CPEMI = User.CPEMI;
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ if (!isThumb2)
+ Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT;
+
MachineBasicBlock::iterator MI_JT = MI;
MachineInstr *NewJTMI =
BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
if (CanDeleteLEA) {
User.MI->eraseFromParent();
- DeadSize += 4;
+ DeadSize += isThumb2 ? 4 : 2;
// The LEA was eliminated, the TBB instruction becomes the only new user
// of the jump table.
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond directly to anything in the source.
- assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
- BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
- .addImm(ARMCC::AL).addReg(0);
+ if (isThumb2)
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
+ .addMBB(BB)
+ .addImm(ARMCC::AL)
+ .addReg(0);
+ else
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
+ .addMBB(BB)
+ .addImm(ARMCC::AL)
+ .addReg(0);
// Update internal data structures to account for the newly inserted MBB.
MF->RenumberBlocks(NewBB);
(ins i32imm:$label, pred:$p),
2, IIC_iALUi, []>, Sched<[WriteALU]>;
+// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
+// and make use of the same compressed jump table format as Thumb-2.
+let Size = 2 in {
+def tTBB_JT : tPseudoInst<(outs),
+ (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+def tTBH_JT : tPseudoInst<(outs),
+ (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
+}
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
; RUN: llc -relocation-model=ropi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
-; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_ABS
-; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC
-; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC
+; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
+; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
+; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
declare void @exit0()
; THUMB2: [[LBB4]]
; THUMB2-NEXT: b exit4
-; THUMB1: lsls r[[R_TAB_INDEX:[0-9]+]], r{{[0-9]+}}, #2
-; THUMB1: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]
-; THUMB1: ldr r[[R_BB_ADDR:[0-9]+]], [r[[R_TAB_INDEX]], r[[R_TAB_BASE]]]
-; THUMB1_PC: adds r[[R_BB_ADDR]], r[[R_BB_ADDR]], r[[R_TAB_BASE]]
-; THUMB1: mov pc, r[[R_BB_ADDR]]
-; THUMB1: [[LJTI]]
-; THUMB1_ABS: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_ABS: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_ABS: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_ABS: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_PC: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
-; THUMB1_PC: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
-; THUMB1_PC: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
-; THUMB1_PC: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
+
+; THUMB1: add r[[x:[0-9]+]], pc
+; THUMB1: ldrb r[[x]], [r[[x]], #4]
+; THUMB1: lsls r[[x]], r[[x]], #1
+; THUMB1: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:
+; THUMB1: add pc, r[[x]]
+; THUMB1: .p2align 2
+; THUMB1: .byte ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
+; THUMB1: .byte ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
+; THUMB1: .byte ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
+; THUMB1: .byte ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB1: [[LBB1]]
; THUMB1-NEXT: bl exit1
-; THUMB1-NEXT: pop
; THUMB1: [[LBB2]]
; THUMB1-NEXT: bl exit2
-; THUMB1-NEXT: pop
; THUMB1: [[LBB3]]
; THUMB1-NEXT: bl exit3
-; THUMB1-NEXT: pop
; THUMB1: [[LBB4]]
; THUMB1-NEXT: bl exit4
-; THUMB1-NEXT: pop
}
--- /dev/null
+; RUN: llc < %s
+; No FileCheck - testing for crash.
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv5e-none-linux-gnueabi"
+
+%struct.blam = type { [4 x %struct.eggs], [6 x [15 x i16]], [6 x i32], i32, i32, i32, i32, i32, i32, %struct.eggs, [4 x %struct.eggs], [4 x %struct.eggs], [4 x i32], i32, i32, i32, [4 x %struct.eggs], [4 x %struct.eggs], i32, %struct.eggs, i32 }
+%struct.eggs = type { i32, i32 }
+
+define void @spam(%struct.blam* %arg, i32 %arg1) {
+bb:
+ %tmp = getelementptr inbounds %struct.blam, %struct.blam* %arg, i32 undef, i32 2, i32 %arg1
+ switch i32 %arg1, label %bb8 [
+ i32 0, label %bb2
+ i32 1, label %bb3
+ i32 2, label %bb4
+ i32 3, label %bb5
+ i32 4, label %bb6
+ i32 5, label %bb7
+ ]
+
+bb2: ; preds = %bb
+ unreachable
+
+bb3: ; preds = %bb
+ unreachable
+
+bb4: ; preds = %bb
+ unreachable
+
+bb5: ; preds = %bb
+ unreachable
+
+bb6: ; preds = %bb
+ unreachable
+
+bb7: ; preds = %bb
+ unreachable
+
+bb8: ; preds = %bb
+ store i32 1, i32* %tmp, align 4
+ unreachable
+}
--- /dev/null
+; RUN: llc -mtriple=thumbv7m-linux-gnu -o - %s | FileCheck %s --check-prefix=T2
+; RUN: llc -mtriple=thumbv6m-linux-gnu -o - %s | FileCheck %s --check-prefix=T1
+
+declare void @foo(double)
+declare i32 @llvm.arm.space(i32, i32)
+
+define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) {
+ br label %complex
+
+; T2-LABEL: test_tbh:
+; T2: [[ANCHOR:.LCPI[0-9_]+]]:
+; T2: tbh [pc, r{{[0-9]+}}, lsl #1]
+; T2-NEXT: @ BB#1
+; T2-NEXT: LJTI
+; T2-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2
+; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T2-NEXT: .short (.LBB0_[[x]]-([[ANCHOR]]+4))/2
+
+; T1-LABEL: test_tbh:
+; T1: lsls [[x:r[0-9]+]], r4, #1
+; T1: add [[x]], pc
+; T1: ldrh [[x]], {{\[}}[[x]], #4]
+; T1: lsls [[x]], [[x]], #1
+; T1: [[ANCHOR:.LCPI[0-9_]+]]:
+; T1: add pc, [[x]]
+; T1-NEXT: @ BB#2
+; T1-NEXT: .p2align 2
+; T1-NEXT: LJTI
+; T1-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2
+; T1-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T1-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T1-NEXT: .short (.LBB0_[[x]]-([[ANCHOR]]+4))/2
+
+complex:
+ call void @foo(double 12345.0)
+ switch i32 %sw, label %second [ i32 0, label %other
+ i32 1, label %third
+ i32 2, label %end
+ i32 3, label %other ]
+
+second:
+ ret i32 43
+third:
+ call i32 @llvm.arm.space(i32 970, i32 undef)
+ ret i32 0
+
+other:
+ call void @bar()
+ unreachable
+
+end:
+ ret i32 42
+}
+
+declare void @bar()
; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6-eabi -mcpu=cortex-m0 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s
; Do not use tbb / tbh if any destination is before the jumptable.
; rdar://7102917
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=static | FileCheck %s --check-prefix=THUMB1
+; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=THUMB1
define void @bar(i32 %n.u) {
entry:
; CHECK: .end_data_region
; CHECK-NEXT: .p2align 1
+; THUMB1-LABEL: bar:
+; THUMB1: add pc, r0
+; THUMB1: .data_region jt8
+; THUMB1: .byte (LBB0_3-(LCPI0_0+4))/2
+; THUMB1: .end_data_region
+; THUMB1-NEXT: .p2align 1
+
switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
bb:
tail call void(...) @foo1()
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=CHECK --check-prefix=T2
+; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=CHECK --check-prefix=T1
+; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=static | FileCheck %s --check-prefix=CHECK --check-prefix=T1
; Thumb2 target should reorder the bb's in order to use tbb / tbh.
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
; CHECK-LABEL: main:
; CHECK-NOT: adr {{r[0-9]+}}, LJTI
+; T1: lsls r[[x:[0-9]+]], {{r[0-9]+}}, #1
; CHECK: [[PCREL_ANCHOR:LCPI[0-9]+_[0-9]+]]:
-; CHECK-NEXT: tbb [pc, {{r[0-9]+}}]
+; T2-NEXT: tbb [pc, {{r[0-9]+}}]
+; T1-NEXT: add pc, r[[x]]
; CHECK: LJTI0_0:
; CHECK-NEXT: .data_region jt8