From d8ca88c062f0d5a0c475570797513aec38ff2a82 Mon Sep 17 00:00:00 2001 From: Huihui Zhang Date: Tue, 18 Jun 2019 20:55:09 +0000 Subject: [PATCH] [ARM] Comply with rules on ARMv8-A thumb mode partial deprecation of IT. Summary: When identifing instructions that can be folded into a MOVCC instruction, checking for a predicate operand is not enough, also need to check for thumb2 function, with restrict-IT, is the machine instruction eligible for ARMv8 IT or not. Notes in ARMv8-A Architecture Reference Manual, section "Partial deprecation of IT" https://usermanual.wiki/Pdf/ARM20Architecture20Reference20ManualARMv8.1667877052.pdf "ARMv8-A deprecates some uses of the T32 IT instruction. All uses of IT that apply to instructions other than a single subsequent 16-bit instruction from a restricted set are deprecated, as are explicit references to the PC within that single 16-bit instruction. This permits the non-deprecated forms of IT and subsequent instructions to be treated as a single 32-bit conditional instruction." Reviewers: efriedma, lebedev.ri, t.p.northover, jmolloy, aemerson, compnerd, stoklund, ostannard Reviewed By: ostannard Subscribers: ostannard, javed.absar, kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63474 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363739 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 10 ++--- lib/Target/ARM/ARMBaseInstrInfo.h | 11 +++--- test/CodeGen/ARM/2014-08-04-muls-it.ll | 3 +- test/CodeGen/ARM/CGP/arm-cgp-icmps.ll | 2 +- test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll | 2 +- test/CodeGen/ARM/arm-and-tst-peephole.ll | 7 ++-- ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 38 ++++++++++--------- ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 38 ++++++++++--------- test/CodeGen/Thumb2/v8_deprecate_IT.ll | 32 ++++++++++++++++ 9 files changed, 90 insertions(+), 53 deletions(-) create mode 100644 test/CodeGen/Thumb2/v8_deprecate_IT.ll diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9137e063772..e8c7051a237 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2042,9 +2042,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, /// Identify instructions that can be folded into a MOVCC instruction, and /// return the defining instruction. -static MachineInstr *canFoldIntoMOVCC(unsigned Reg, - const MachineRegisterInfo &MRI, - const TargetInstrInfo *TII) { +MachineInstr * +ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) const { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) @@ -2052,8 +2052,8 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return nullptr; - // MI is folded into the MOVCC by predicating it. - if (!MI->isPredicable()) + // Check if MI can be predicated and folded into the MOVCC. + if (!isPredicable(*MI)) return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index d1e6f89a264..620a2b6f050 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -398,6 +398,11 @@ private: void expandMEMCPY(MachineBasicBlock::iterator) const; + /// Identify instructions that can be folded into a MOVCC instruction, and + /// return the defining instruction. + MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) const; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -526,12 +531,6 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg); unsigned getMatchingCondBranchOpcode(unsigned Opc); -/// Determine if MI can be folded into an ARM MOVCC instruction, and return the -/// opcode of the SSA instruction representing the conditional MI. -unsigned canFoldARMInstrIntoMOVCC(unsigned Reg, - MachineInstr *&MI, - const MachineRegisterInfo &MRI); - /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether /// the instruction is encoded with an 'S' bit is determined by the optional /// CPSR def operand. diff --git a/test/CodeGen/ARM/2014-08-04-muls-it.ll b/test/CodeGen/ARM/2014-08-04-muls-it.ll index 5ba1347433d..a900c1180b3 100644 --- a/test/CodeGen/ARM/2014-08-04-muls-it.ll +++ b/test/CodeGen/ARM/2014-08-04-muls-it.ll @@ -16,8 +16,9 @@ if.end: ; preds = %if.then, %entry } ; CHECK-LABEL: function +; CHECK: mul r2, r0, r0 ; CHECK: cmp r0, r1 ; CHECK-NOT: mulseq r0, r0, r0 -; CHECK: muleq r0, r0, r0 +; CHECK: moveq r0, r2 ; CHECK: bx lr diff --git a/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll b/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll index 8ff7db51e65..76c9746c355 100644 --- a/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll +++ b/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll @@ -235,7 +235,7 @@ entry: } ; CHECK-COMMON-LABEL: icmp_eq_minus_one -; CHECK-COMMON: cmp r0, #255 +; CHECK-COMMON: cmp {{r[0-9]+}}, #255 define i32 @icmp_eq_minus_one(i8* %ptr) { %load = load i8, i8* %ptr, align 1 %conv = zext i8 %load to i32 diff --git a/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll b/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll index 98794f500d4..15030bd3866 100644 --- a/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll +++ b/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll @@ -54,9 +54,9 @@ entry: ; CHECK-DSP: cmp ; CHECK-DSP: cmp -; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2 ; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]], ; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]] +; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2 ; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]] ; CHECK-DSP-IMM-NOT: uxt ; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1 diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 3b1503070cb..9c5f5c7b7de 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -168,10 +168,11 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) { ; ; V8-LABEL: test_tst_assessment: ; V8: @ %bb.0: -; V8-NEXT: and r0, r0, #1 +; V8-NEXT: and r2, r0, #1 +; V8-NEXT: subs r0, r2, #1 ; V8-NEXT: lsls r1, r1, #31 -; V8-NEXT: it ne -; V8-NEXT: subne r0, #1 +; V8-NEXT: it eq +; V8-NEXT: moveq r0, r2 ; V8-NEXT: bx lr %and1 = and i32 %a, 1 %sub = sub i32 %and1, 1 diff --git a/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll index 6c30586a959..973ae4e676b 100644 --- a/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -440,24 +440,25 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ; THUMB8-LABEL: scalar_i64_signbit_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: sub.w lr, r2, #32 +; THUMB8-NEXT: .save {r4, lr} +; THUMB8-NEXT: push {r4, lr} +; THUMB8-NEXT: rsb.w r4, r2, #32 +; THUMB8-NEXT: sub.w r3, r2, #32 ; THUMB8-NEXT: mov.w r12, #-2147483648 -; THUMB8-NEXT: cmp.w lr, #0 -; THUMB8-NEXT: lsl.w r3, r12, r3 +; THUMB8-NEXT: cmp r3, #0 +; THUMB8-NEXT: lsl.w r4, r12, r4 ; THUMB8-NEXT: lsr.w r2, r12, r2 +; THUMB8-NEXT: lsr.w lr, r12, r3 ; THUMB8-NEXT: it ge -; THUMB8-NEXT: lsrge.w r3, r12, lr +; THUMB8-NEXT: movge r4, lr ; THUMB8-NEXT: it ge ; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r0, r3 +; THUMB8-NEXT: ands r0, r4 ; THUMB8-NEXT: ands r1, r2 ; THUMB8-NEXT: orrs r0, r1 ; THUMB8-NEXT: clz r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: pop {r4, pc} %t0 = lshr i64 9223372036854775808, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -615,28 +616,29 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; ; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} +; THUMB8-NEXT: .save {r4, lr} +; THUMB8-NEXT: push {r4, lr} ; THUMB8-NEXT: movs r3, #0 ; THUMB8-NEXT: movw lr, #65535 ; THUMB8-NEXT: movt r3, #65535 ; THUMB8-NEXT: lsr.w r12, r3, r2 ; THUMB8-NEXT: rsb.w r3, r2, #32 ; THUMB8-NEXT: lsl.w r3, lr, r3 -; THUMB8-NEXT: orr.w r3, r3, r12 -; THUMB8-NEXT: sub.w r12, r2, #32 -; THUMB8-NEXT: cmp.w r12, #0 +; THUMB8-NEXT: orr.w r12, r12, r3 +; THUMB8-NEXT: sub.w r3, r2, #32 ; THUMB8-NEXT: lsr.w r2, lr, r2 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: lsrge.w r3, lr, r12 +; THUMB8-NEXT: cmp r3, #0 +; THUMB8-NEXT: lsr.w r4, lr, r3 +; THUMB8-NEXT: it lt +; THUMB8-NEXT: movlt r4, r12 ; THUMB8-NEXT: it ge ; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r0, r3 +; THUMB8-NEXT: ands r0, r4 ; THUMB8-NEXT: ands r1, r2 ; THUMB8-NEXT: orrs r0, r1 ; THUMB8-NEXT: clz r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: pop {r4, pc} %t0 = lshr i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 diff --git a/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll index 82572af4d68..d00df6d0f97 100644 --- a/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -518,24 +518,25 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; ; THUMB8-LABEL: scalar_i64_lowestbit_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: sub.w lr, r2, #32 +; THUMB8-NEXT: .save {r4, lr} +; THUMB8-NEXT: push {r4, lr} +; THUMB8-NEXT: rsb.w r4, r2, #32 +; THUMB8-NEXT: sub.w r3, r2, #32 ; THUMB8-NEXT: mov.w r12, #1 -; THUMB8-NEXT: cmp.w lr, #0 -; THUMB8-NEXT: lsr.w r3, r12, r3 +; THUMB8-NEXT: cmp r3, #0 +; THUMB8-NEXT: lsr.w r4, r12, r4 ; THUMB8-NEXT: lsl.w r2, r12, r2 +; THUMB8-NEXT: lsl.w lr, r12, r3 ; THUMB8-NEXT: it ge -; THUMB8-NEXT: lslge.w r3, r12, lr +; THUMB8-NEXT: movge r4, lr ; THUMB8-NEXT: it ge ; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r1, r3 +; THUMB8-NEXT: ands r1, r4 ; THUMB8-NEXT: ands r0, r2 ; THUMB8-NEXT: orrs r0, r1 ; THUMB8-NEXT: clz r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: pop {r4, pc} %t0 = shl i64 1, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -635,28 +636,29 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; ; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} +; THUMB8-NEXT: .save {r4, lr} +; THUMB8-NEXT: push {r4, lr} ; THUMB8-NEXT: movw r3, #65535 ; THUMB8-NEXT: movw lr, #0 ; THUMB8-NEXT: lsl.w r12, r3, r2 ; THUMB8-NEXT: rsb.w r3, r2, #32 ; THUMB8-NEXT: movt lr, #65535 ; THUMB8-NEXT: lsr.w r3, lr, r3 -; THUMB8-NEXT: orr.w r3, r3, r12 -; THUMB8-NEXT: sub.w r12, r2, #32 -; THUMB8-NEXT: cmp.w r12, #0 +; THUMB8-NEXT: orr.w r12, r12, r3 +; THUMB8-NEXT: sub.w r3, r2, #32 ; THUMB8-NEXT: lsl.w r2, lr, r2 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: lslge.w r3, lr, r12 +; THUMB8-NEXT: cmp r3, #0 +; THUMB8-NEXT: lsl.w r4, lr, r3 +; THUMB8-NEXT: it lt +; THUMB8-NEXT: movlt r4, r12 ; THUMB8-NEXT: it ge ; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r1, r3 +; THUMB8-NEXT: ands r1, r4 ; THUMB8-NEXT: ands r0, r2 ; THUMB8-NEXT: orrs r0, r1 ; THUMB8-NEXT: clz r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: pop {r4, pc} %t0 = shl i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 diff --git a/test/CodeGen/Thumb2/v8_deprecate_IT.ll b/test/CodeGen/Thumb2/v8_deprecate_IT.ll new file mode 100644 index 00000000000..b9bbce13c95 --- /dev/null +++ b/test/CodeGen/Thumb2/v8_deprecate_IT.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=thumbv7 -o - | llvm-mc -triple thumbv7 --show-encoding 2>&1 | FileCheck %s --check-prefix=V7 +; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -o - | llvm-mc -triple thumbv7 --show-encoding 2>&1 | FileCheck %s --check-prefix=V7_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8 +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT + + +; V7-NOT: warning +; V7_RESTRICT_IT-NOT: warning +; V8-NOT: warning +; V8_NO_RESTRICT_IT: warning: deprecated instruction in IT block +; it ge @ encoding: [0xa8,0xbf] +; lslge.w r3, r12, lr @ encoding: [0x0c,0xfa,0x0e,0xf3] ; deprecated in ARMv8 thumb mode +define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) { +%t0 = shl i64 1, %y +%t1 = and i64 %t0, %x +%res = icmp eq i64 %t1, 0 +ret i1 %res +} + +; V7-NOT: warning +; V7_RESTRICT_IT-NOT: warning +; V8-NOT: warning +; V8_NO_RESTRICT_IT: warning: deprecated instruction in IT block +; it ne @ encoding: [0x18,0xbf] +; movne.w r0, #-1 @ encoding: [0x4f,0xf0,0xff,0x30] ; deprecated in ARMv8 thumb mode +define i32 @icmp_eq_minus_one(i8* %ptr) { + %load = load i8, i8* %ptr, align 1 + %conv = zext i8 %load to i32 + %cmp = icmp eq i8 %load, -1 + %ret = select i1 %cmp, i32 %conv, i32 -1 + ret i32 %ret +} -- 2.50.1