return false;
}
+static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::tLSLri:
+ case ARM::tLSRri:
+ case ARM::tLSLrr:
+ case ARM::tLSRrr:
+ case ARM::tSUBrr:
+ case ARM::tADDrr:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tMUL:
+ IsThumb1 = true;
+ LLVM_FALLTHROUGH;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri:
+ case ARM::t2LSRri:
+ case ARM::t2LSRrr:
+ case ARM::t2LSLri:
+ case ARM::t2LSLrr:
+ return true;
+ }
+}
+
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register;
/// Remove a redundant Compare instruction if an earlier instruction can set the
return false;
}
+ bool IsThumb1 = false;
+ if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
+ return false;
+
+ // We also want to do this peephole for cases like this: if (a*b == 0),
+ // and optimise away the CMP instruction from the generated code sequence:
+ // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
+ // resulting from the select instruction, but these MOVS instructions for
+ // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
+ // However, if we only have MOVS instructions in between the CMP and the
+ // other instruction (the MULS in this example), then the CPSR is dead so we
+ // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
+ // reordering and then continue the analysis hoping we can eliminate the
+ // CMP. This peephole works on the vregs, so is still in SSA form. As a
+ // consequence, the movs won't redefine/kill the MUL operands which would
+ // make this reordering illegal.
+ if (MI && IsThumb1) {
+ --I;
+ bool CanReorder = true;
+ const bool HasStmts = I != E;
+ for (; I != E; --I) {
+ if (I->getOpcode() != ARM::tMOVi8) {
+ CanReorder = false;
+ break;
+ }
+ }
+ if (HasStmts && CanReorder) {
+ MI = MI->removeFromParent();
+ E = CmpInstr;
+ CmpInstr.getParent()->insert(E, MI);
+ }
+ I = CmpInstr;
+ E = MI;
+ }
+
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (isPredicated(*MI))
return false;
- bool IsThumb1 = false;
- switch (MI->getOpcode()) {
- default: break;
- case ARM::tLSLri:
- case ARM::tLSRri:
- case ARM::tLSLrr:
- case ARM::tLSRrr:
- case ARM::tSUBrr:
- case ARM::tADDrr:
- case ARM::tADDi3:
- case ARM::tADDi8:
- case ARM::tSUBi3:
- case ARM::tSUBi8:
- IsThumb1 = true;
- LLVM_FALLTHROUGH;
- case ARM::RSBrr:
- case ARM::RSBri:
- case ARM::RSCrr:
- case ARM::RSCri:
- case ARM::ADDrr:
- case ARM::ADDri:
- case ARM::ADCrr:
- case ARM::ADCri:
- case ARM::SUBrr:
- case ARM::SUBri:
- case ARM::SBCrr:
- case ARM::SBCri:
- case ARM::t2RSBri:
- case ARM::t2ADDrr:
- case ARM::t2ADDri:
- case ARM::t2ADCrr:
- case ARM::t2ADCri:
- case ARM::t2SUBrr:
- case ARM::t2SUBri:
- case ARM::t2SBCrr:
- case ARM::t2SBCri:
- case ARM::ANDrr:
- case ARM::ANDri:
- case ARM::t2ANDrr:
- case ARM::t2ANDri:
- case ARM::ORRrr:
- case ARM::ORRri:
- case ARM::t2ORRrr:
- case ARM::t2ORRri:
- case ARM::EORrr:
- case ARM::EORri:
- case ARM::t2EORrr:
- case ARM::t2EORri:
- case ARM::t2LSRri:
- case ARM::t2LSRrr:
- case ARM::t2LSLri:
- case ARM::t2LSLrr: {
- // Scan forward for the use of CPSR
- // When checking against MI: if it's a conditional code that requires
- // checking of the V bit or C bit, then this is not safe to do.
- // It is safe to remove CmpInstr if CPSR is redefined or killed.
- // If we are done with the basic block, we need to check whether CPSR is
- // live-out.
- SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
- OperandsToUpdate;
- bool isSafe = false;
- I = CmpInstr;
- E = CmpInstr.getParent()->end();
- while (!isSafe && ++I != E) {
- const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands();
- !isSafe && IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
- isSafe = true;
- break;
- }
- if (!MO.isReg() || MO.getReg() != ARM::CPSR)
- continue;
- if (MO.isDef()) {
- isSafe = true;
- break;
- }
- // Condition code is after the operand before CPSR except for VSELs.
- ARMCC::CondCodes CC;
- bool IsInstrVSel = true;
- switch (Instr.getOpcode()) {
- default:
- IsInstrVSel = false;
- CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
- break;
- case ARM::VSELEQD:
- case ARM::VSELEQS:
- CC = ARMCC::EQ;
- break;
- case ARM::VSELGTD:
- case ARM::VSELGTS:
- CC = ARMCC::GT;
- break;
- case ARM::VSELGED:
- case ARM::VSELGES:
- CC = ARMCC::GE;
- break;
- case ARM::VSELVSS:
- case ARM::VSELVSD:
- CC = ARMCC::VS;
- break;
- }
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code that requires
+ // checking of the V bit or C bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
+ bool isSafe = false;
+ I = CmpInstr;
+ E = CmpInstr.getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
- if (Sub) {
- ARMCC::CondCodes NewCC = getSwappedCondition(CC);
- if (NewCC == ARMCC::AL)
- return false;
- // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
- // on CMP needs to be updated to be based on SUB.
- // Push the condition code operands to OperandsToUpdate.
- // If it is safe to remove CmpInstr, the condition code of these
- // operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg) {
- // VSel doesn't support condition code update.
- if (IsInstrVSel)
- return false;
- OperandsToUpdate.push_back(
- std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
- }
- } else {
- // No Sub, so this is x = <op> y, z; cmp x, 0.
- switch (CC) {
- case ARMCC::EQ: // Z
- case ARMCC::NE: // Z
- case ARMCC::MI: // N
- case ARMCC::PL: // N
- case ARMCC::AL: // none
- // CPSR can be used multiple times, we should continue.
- break;
- case ARMCC::HS: // C
- case ARMCC::LO: // C
- case ARMCC::VS: // V
- case ARMCC::VC: // V
- case ARMCC::HI: // C Z
- case ARMCC::LS: // C Z
- case ARMCC::GE: // N V
- case ARMCC::LT: // N V
- case ARMCC::GT: // Z N V
- case ARMCC::LE: // Z N V
- // The instruction uses the V bit or C bit which is not safe.
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
return false;
- }
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
- }
- }
-
- // If CPSR is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if (!isSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM::CPSR))
+ } else {
+ // No Sub, so this is x = <op> y, z; cmp x, 0.
+ switch (CC) {
+ case ARMCC::EQ: // Z
+ case ARMCC::NE: // Z
+ case ARMCC::MI: // N
+ case ARMCC::PL: // N
+ case ARMCC::AL: // none
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::HS: // C
+ case ARMCC::LO: // C
+ case ARMCC::VS: // V
+ case ARMCC::VC: // V
+ case ARMCC::HI: // C Z
+ case ARMCC::LS: // C Z
+ case ARMCC::GE: // N V
+ case ARMCC::LT: // N V
+ case ARMCC::GT: // Z N V
+ case ARMCC::LE: // Z N V
+ // The instruction uses the V bit or C bit which is not safe.
return false;
+ }
+ }
}
+ }
- // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
- // set CPSR so this is represented as an explicit output)
- if (!IsThumb1) {
- MI->getOperand(5).setReg(ARM::CPSR);
- MI->getOperand(5).setIsDef(true);
- }
- assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
- CmpInstr.eraseFromParent();
-
- // Modify the condition code of operands in OperandsToUpdate.
- // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
- // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
- for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
- OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
- return true;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr.getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
}
+
+ // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
+ // set CPSR so this is represented as an explicit output)
+ if (!IsThumb1) {
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
}
-
- return false;
+ assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
+ CmpInstr.eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+
+ return true;
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
--- /dev/null
+# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s
+
+--- |
+ ; ModuleID = '<stdin>'
+ source_filename = "<stdin>"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumb-none--eabi"
+
+ define i32 @f(i32 %a, i32 %b) {
+ entry:
+ %mul = mul nsw i32 %b, %a
+ %cmp = icmp eq i32 %mul, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+ }
+
+...
+---
+name: f
+# CHECK-LABEL: name: f
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: tgpr }
+ - { id: 1, class: tgpr }
+ - { id: 2, class: tgpr }
+ - { id: 3, class: tgpr }
+ - { id: 4, class: tgpr }
+ - { id: 5, class: tgpr }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+
+# CHECK: tMOVi8 1, 14, _
+# CHECK: tMOVi8 0, 14, _
+# CHECK: tMUL %1, %0, 14, _
+# CHECK-NOT: tCMPi8
+body: |
+ bb.0.entry:
+ successors: %bb.1.entry(0x40000000), %bb.2.entry(0x40000000)
+ liveins: %r0, %r1
+
+ %1 = COPY %r1
+ %0 = COPY %r0
+ %2, %cpsr = tMUL %1, %0, 14, _
+ %3, %cpsr = tMOVi8 1, 14, _
+ %4, %cpsr = tMOVi8 0, 14, _
+ tCMPi8 killed %2, 0, 14, _, implicit-def %cpsr
+ tBcc %bb.2.entry, 0, %cpsr
+
+ bb.1.entry:
+ successors: %bb.2.entry(0x80000000)
+
+
+ bb.2.entry:
+ %5 = PHI %4, %bb.1.entry, %3, %bb.0.entry
+ %r0 = COPY %5
+ tBX_RET 14, _, implicit %r0
+
+...
--- /dev/null
+# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s
+
+# Here we check that the peephole cmp rewrite is not triggered, because
+# there is store instruction between the tMUL and tCMP, i.e. there are
+# no constants to reorder.
+
+--- |
+ ; ModuleID = 'cmp2-peephole-thumb.ll'
+ source_filename = "<stdin>"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumb-none--eabi"
+
+ define i32 @g(i32 %a, i32 %b) {
+ entry:
+ %retval = alloca i32, align 4
+ %mul = alloca i32, align 4
+ %mul1 = mul nsw i32 %a, %b
+ store i32 %mul1, i32* %mul, align 4
+ %0 = load i32, i32* %mul, align 4
+ %cmp = icmp sle i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+ if.then: ; preds = %entry
+ store i32 42, i32* %retval, align 4
+ br label %return
+
+ if.end: ; preds = %entry
+ store i32 1, i32* %retval, align 4
+ br label %return
+
+ return: ; preds = %if.end, %if.then
+ %1 = load i32, i32* %retval, align 4
+ ret i32 %1
+ }
+
+...
+---
+name: g
+# CHECK-LABEL: name: g
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: tgpr }
+ - { id: 1, class: tgpr }
+ - { id: 2, class: tgpr }
+ - { id: 3, class: tgpr }
+ - { id: 4, class: tgpr }
+ - { id: 5, class: tgpr }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+stack:
+ - { id: 0, name: retval, offset: 0, size: 4, alignment: 4, local-offset: -4 }
+ - { id: 1, name: mul, offset: 0, size: 4, alignment: 4, local-offset: -8 }
+
+# CHECK: tMUL
+# CHECK-NEXT: tSTRspi
+# CHECK-NEXT: tCMPi8
+body: |
+ bb.0.entry:
+ successors: %bb.1.if.then(0x40000000), %bb.2.if.end(0x40000000)
+ liveins: %r0, %r1
+
+ %1 = COPY %r1
+ %0 = COPY %r0
+ %2, %cpsr = tMUL %0, %1, 14, _
+ tSTRspi %2, %stack.1.mul, 0, 14, _ :: (store 4 into %ir.mul)
+ tCMPi8 %2, 0, 14, _, implicit-def %cpsr
+ tBcc %bb.2.if.end, 12, %cpsr
+ tB %bb.1.if.then, 14, _
+
+ bb.1.if.then:
+ successors: %bb.3.return(0x80000000)
+
+ %4, %cpsr = tMOVi8 42, 14, _
+ tSTRspi killed %4, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval)
+ tB %bb.3.return, 14, _
+
+ bb.2.if.end:
+ successors: %bb.3.return(0x80000000)
+
+ %3, %cpsr = tMOVi8 1, 14, _
+ tSTRspi killed %3, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval)
+
+ bb.3.return:
+ %5 = tLDRspi %stack.0.retval, 0, 14, _ :: (dereferenceable load 4 from %ir.retval)
+ %r0 = COPY %5
+ tBX_RET 14, _, implicit %r0
+
+...