From: Jessica Paquette Date: Mon, 8 Jul 2019 22:58:36 +0000 (+0000) Subject: [AArch64][GlobalISel] Use TST for comparisons when possible X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=430cb4eceff433d4955447a24d1b36443f908574;p=llvm [AArch64][GlobalISel] Use TST for comparisons when possible Porting over the part of `emitComparison` in AArch64ISelLowering where we use TST to represent a compare. - Rename `tryOptCMN` to `tryFoldIntegerCompare`, since it now also emits TSTs when possible. - Add a utility function for emitting a TST with register operands. - Rename opt-fold-cmn.mir to opt-fold-compare.mir, since it now also tests the TST fold as well. Differential Revision: https://reviews.llvm.org/D64371 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365404 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 46d6ccb7c2a..bef690c8361 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -130,6 +130,8 @@ private: MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitTST(const Register &LHS, const Register &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitExtractVectorElt(Optional DstReg, const RegisterBank &DstRB, LLT ScalarTy, Register VecReg, unsigned LaneIdx, @@ -202,9 +204,9 @@ private: bool tryOptVectorShuffle(MachineInstr &I) const; bool tryOptVectorDup(MachineInstr &MI) const; bool tryOptSelect(MachineInstr &MI) const; - MachineInstr *tryOptCMN(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, - MachineIRBuilder &MIRBuilder) const; + MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, + MachineOperand &Predicate, + MachineIRBuilder &MIRBuilder) const; const AArch64TargetMachine &TM; const AArch64Subtarget &STI; @@ -801,6 +803,19 @@ static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { return CmpOpcTbl[ShouldUseImm][OpSize == 64]; } +/// Returns true if \p P is an unsigned integer comparison predicate. +static bool isUnsignedICMPPred(const CmpInst::Predicate P) { + switch (P) { + default: + return false; + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + return true; + } +} + static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { switch (P) { default: @@ -2919,16 +2934,45 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, return &*CmpMI; } +MachineInstr * +AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, + MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + unsigned RegSize = MRI.getType(LHS).getSizeInBits(); + bool Is32Bit = (RegSize == 32); + static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri}, + {AArch64::ANDSWrr, AArch64::ANDSWri}}; + Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; + + // We might be able to fold in an immediate into the TST. We need to make sure + // it's a logical immediate though, since ANDS requires that. + auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); + bool IsImmForm = ValAndVReg.hasValue() && + AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize); + unsigned Opc = OpcTable[Is32Bit][IsImmForm]; + auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); + + if (IsImmForm) + TstMI.addImm( + AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); + else + TstMI.addUse(RHS); + + constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); + return &*TstMI; +} + MachineInstr *AArch64InstructionSelector::emitIntegerCompare( MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - // Fold the compare into a CMN if possible. - MachineInstr *Cmn = tryOptCMN(LHS, RHS, Predicate, MIRBuilder); - if (Cmn) - return Cmn; + // Fold the compare if possible. + MachineInstr *FoldCmp = + tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); + if (FoldCmp) + return FoldCmp; // Can't fold into a CMN. Just emit a normal compare. unsigned CmpOpc = 0; @@ -3170,10 +3214,9 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { return true; } -MachineInstr * -AArch64InstructionSelector::tryOptCMN(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, - MachineIRBuilder &MIRBuilder) const { +MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( + MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand"); MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); @@ -3228,42 +3271,52 @@ AArch64InstructionSelector::tryOptCMN(MachineOperand &LHS, MachineOperand &RHS, // Check if the RHS or LHS of the G_ICMP is defined by a SUB MachineInstr *LHSDef = FindDef(LHS.getReg()); MachineInstr *RHSDef = FindDef(RHS.getReg()); - const AArch64CC::CondCode CC = - changeICMPPredToAArch64CC((CmpInst::Predicate)Predicate.getPredicate()); - bool DidFold = false; - - MachineOperand CMNLHS = LHS; - MachineOperand CMNRHS = RHS; - if (IsCMN(LHSDef, CC)) { - // We're doing this: - // - // Given: - // - // x = G_SUB 0, y - // G_ICMP x, z - // - // Update the G_ICMP: - // - // G_ICMP y, z - CMNLHS = LHSDef->getOperand(2); - DidFold = true; - } else if (IsCMN(RHSDef, CC)) { - // Same idea here, but with the RHS of the compare instead: - // - // Given: - // - // x = G_SUB 0, y - // G_ICMP z, x - // - // Update the G_ICMP: - // - // G_ICMP z, y - CMNRHS = RHSDef->getOperand(2); - DidFold = true; + CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); + + // Given this: + // + // x = G_SUB 0, y + // G_ICMP x, z + // + // Produce this: + // + // cmn y, z + if (IsCMN(LHSDef, CC)) + return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); + + // Same idea here, but with the RHS of the compare instead: + // + // Given this: + // + // x = G_SUB 0, y + // G_ICMP z, x + // + // Produce this: + // + // cmn z, y + if (IsCMN(RHSDef, CC)) + return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); + + // Given this: + // + // z = G_AND x, y + // G_ICMP z, 0 + // + // Produce this if the compare is signed: + // + // tst x, y + if (!isUnsignedICMPPred(P) && LHSDef && + LHSDef->getOpcode() == TargetOpcode::G_AND) { + // Make sure that the RHS is 0. + auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); + if (!ValAndVReg || ValAndVReg->Value != 0) + return nullptr; + + return emitTST(LHSDef->getOperand(1).getReg(), + LHSDef->getOperand(2).getReg(), MIRBuilder); } - if (DidFold) - return emitCMN(CMNLHS, CMNRHS, MIRBuilder); return nullptr; } diff --git a/test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir b/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir similarity index 58% rename from test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir rename to test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir index adbffe239ff..b78a2cb2719 100644 --- a/test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir +++ b/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir @@ -1,15 +1,32 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # -# Verify that we can fold G_SUB into G_ICMP when we have a pattern like this: +# Verify folding operations into G_ICMP. +# +# E.g cmn/adds folding: # # x = G_SUB 0, y # G_ICMP intpred(something_safe) z, x # +# Folds to: +# adds z, y +# # Where "something_safe" is ne or eq. # +# ands/tst folding: +# +# z = G_AND x, y +# G_ICMP z, 0 +# +# Folds to: +# +# tst x, y +# +# When we have signed comparisons. +# # Tests whose names start with cmn_ should use ADDS for the G_ICMP. Tests whose -# names start with no_cmn should use SUBS. +# names start with no_cmn should use SUBS. Similarly, tests whose names start +# with TST should use ANDS for the G_ICMP. # ... @@ -273,3 +290,191 @@ body: | RET_ReallyLR implicit $x0 ... +--- +name: tst_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: tst_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = ANDSWrr [[MOVi32imm]], [[COPY]], implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 0, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %2, %1 + %8:gpr(s32) = G_CONSTANT i32 0 + %7:gpr(s32) = G_ICMP intpred(eq), %3(s32), %8 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: tst_s64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: tst_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 0 + ; CHECK: [[MOVi64imm1:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: $xzr = ANDSXrr [[MOVi64imm]], [[COPY]], implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[MOVi64imm1]], [[MOVi64imm]], 0, implicit $nzcv + ; CHECK: $x0 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = G_CONSTANT i64 0 + %6:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %2, %1 + %8:gpr(s64) = G_CONSTANT i64 0 + %7:gpr(s32) = G_ICMP intpred(eq), %3(s64), %8 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s64) = G_SELECT %4(s1), %6, %2 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_tst_unsigned_compare +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: no_tst_unsigned_compare + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32common = ANDWrr [[MOVi32imm]], [[COPY]] + ; CHECK: $wzr = SUBSWri [[ANDWrr]], 0, 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %2, %1 + %8:gpr(s32) = G_CONSTANT i32 0 + %7:gpr(s32) = G_ICMP intpred(ugt), %3(s32), %8 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_tst_nonzero +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: no_tst_nonzero + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32common = ANDWrr [[MOVi32imm]], [[COPY]] + ; CHECK: $wzr = SUBSWri [[ANDWrr]], 42, 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %2, %1 + %8:gpr(s32) = G_CONSTANT i32 42 + %7:gpr(s32) = G_ICMP intpred(ugt), %3(s32), %8 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: imm_tst +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: imm_tst + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: $wzr = ANDSWri [[COPY]], 1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $w0 = COPY [[CSINCWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %3:gpr(s32) = G_CONSTANT i32 1 + + ; This can be represented as a logical immediate, so we can pull it into + ; the ANDS. We should get ANDSWri. + %4:gpr(s32) = G_CONSTANT i32 3 + + %5:gpr(s32) = G_AND %1, %4 + %6:gpr(s32) = G_ICMP intpred(eq), %5(s32), %2 + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + + +... +--- +name: no_imm_tst_not_logical_imm +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: no_imm_tst_not_logical_imm + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -1 + ; CHECK: $wzr = ANDSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $w0 = COPY [[CSINCWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %3:gpr(s32) = G_CONSTANT i32 1 + + ; This immediate can't be represented as a logical immediate. We shouldn't + ; select ANDSWri. + %4:gpr(s32) = G_CONSTANT i32 -1 + + %5:gpr(s32) = G_AND %1, %4 + %6:gpr(s32) = G_ICMP intpred(eq), %5(s32), %2 + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0