From: Jessica Paquette <jpaquette@apple.com>
Date: Mon, 8 Jul 2019 22:58:36 +0000 (+0000)
Subject: [AArch64][GlobalISel] Use TST for comparisons when possible
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=430cb4eceff433d4955447a24d1b36443f908574;p=llvm

[AArch64][GlobalISel] Use TST for comparisons when possible

Porting over the part of `emitComparison` in AArch64ISelLowering where we use
TST to represent a compare.

- Rename `tryOptCMN` to `tryFoldIntegerCompare`, since it now also emits TSTs
  when possible.

- Add a utility function for emitting a TST with register operands.

- Rename opt-fold-cmn.mir to opt-fold-compare.mir, since it now also tests the
  TST fold as well.

Differential Revision: https://reviews.llvm.org/D64371

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365404 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 46d6ccb7c2a..bef690c8361 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -130,6 +130,8 @@ private:
                                    MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                         MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
+                        MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
                                      const RegisterBank &DstRB, LLT ScalarTy,
                                      Register VecReg, unsigned LaneIdx,
@@ -202,9 +204,9 @@ private:
   bool tryOptVectorShuffle(MachineInstr &I) const;
   bool tryOptVectorDup(MachineInstr &MI) const;
   bool tryOptSelect(MachineInstr &MI) const;
-  MachineInstr *tryOptCMN(MachineOperand &LHS, MachineOperand &RHS,
-                          MachineOperand &Predicate,
-                          MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+                                      MachineOperand &Predicate,
+                                      MachineIRBuilder &MIRBuilder) const;
 
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
@@ -801,6 +803,19 @@ static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
 }
 
+/// Returns true if \p P is an unsigned integer comparison predicate.
+static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
+  switch (P) {
+  default:
+    return false;
+  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE:
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    return true;
+  }
+}
+
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   switch (P) {
   default:
@@ -2919,16 +2934,45 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
   return &*CmpMI;
 }
 
+MachineInstr *
+AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
+                                    MachineIRBuilder &MIRBuilder) const {
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
+  bool Is32Bit = (RegSize == 32);
+  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
+                                       {AArch64::ANDSWrr, AArch64::ANDSWri}};
+  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+
+  // We might be able to fold in an immediate into the TST. We need to make sure
+  // it's a logical immediate though, since ANDS requires that.
+  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+  bool IsImmForm = ValAndVReg.hasValue() &&
+                   AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
+  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
+  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
+
+  if (IsImmForm)
+    TstMI.addImm(
+        AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
+  else
+    TstMI.addUse(RHS);
+
+  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+  return &*TstMI;
+}
+
 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
     MachineIRBuilder &MIRBuilder) const {
   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
 
-  // Fold the compare into a CMN if possible.
-  MachineInstr *Cmn = tryOptCMN(LHS, RHS, Predicate, MIRBuilder);
-  if (Cmn)
-    return Cmn;
+  // Fold the compare if possible.
+  MachineInstr *FoldCmp =
+      tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
+  if (FoldCmp)
+    return FoldCmp;
 
   // Can't fold into a CMN. Just emit a normal compare.
   unsigned CmpOpc = 0;
@@ -3170,10 +3214,9 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
   return true;
 }
 
-MachineInstr *
-AArch64InstructionSelector::tryOptCMN(MachineOperand &LHS, MachineOperand &RHS,
-                                      MachineOperand &Predicate,
-                                      MachineIRBuilder &MIRBuilder) const {
+MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
+    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+    MachineIRBuilder &MIRBuilder) const {
   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
          "Unexpected MachineOperand");
   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
@@ -3228,42 +3271,52 @@ AArch64InstructionSelector::tryOptCMN(MachineOperand &LHS, MachineOperand &RHS,
   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
   MachineInstr *LHSDef = FindDef(LHS.getReg());
   MachineInstr *RHSDef = FindDef(RHS.getReg());
-  const AArch64CC::CondCode CC =
-      changeICMPPredToAArch64CC((CmpInst::Predicate)Predicate.getPredicate());
-  bool DidFold = false;
-
-  MachineOperand CMNLHS = LHS;
-  MachineOperand CMNRHS = RHS;
-  if (IsCMN(LHSDef, CC)) {
-    // We're doing this:
-    //
-    // Given:
-    //
-    // x = G_SUB 0, y
-    // G_ICMP x, z
-    //
-    // Update the G_ICMP:
-    //
-    // G_ICMP y, z
-    CMNLHS = LHSDef->getOperand(2);
-    DidFold = true;
-  } else if (IsCMN(RHSDef, CC)) {
-    // Same idea here, but with the RHS of the compare instead:
-    //
-    // Given:
-    //
-    // x = G_SUB 0, y
-    // G_ICMP z, x
-    //
-    // Update the G_ICMP:
-    //
-    // G_ICMP z, y
-    CMNRHS = RHSDef->getOperand(2);
-    DidFold = true;
+  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
+
+  // Given this:
+  //
+  // x = G_SUB 0, y
+  // G_ICMP x, z
+  //
+  // Produce this:
+  //
+  // cmn y, z
+  if (IsCMN(LHSDef, CC))
+    return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
+
+  // Same idea here, but with the RHS of the compare instead:
+  //
+  // Given this:
+  //
+  // x = G_SUB 0, y
+  // G_ICMP z, x
+  //
+  // Produce this:
+  //
+  // cmn z, y
+  if (IsCMN(RHSDef, CC))
+    return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
+
+  // Given this:
+  //
+  // z = G_AND x, y
+  // G_ICMP z, 0
+  //
+  // Produce this if the compare is signed:
+  //
+  // tst x, y
+  if (!isUnsignedICMPPred(P) && LHSDef &&
+      LHSDef->getOpcode() == TargetOpcode::G_AND) {
+    // Make sure that the RHS is 0.
+    auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
+    if (!ValAndVReg || ValAndVReg->Value != 0)
+      return nullptr;
+
+    return emitTST(LHSDef->getOperand(1).getReg(),
+                   LHSDef->getOperand(2).getReg(), MIRBuilder);
   }
 
-  if (DidFold)
-    return emitCMN(CMNLHS, CMNRHS, MIRBuilder);
   return nullptr;
 }
 
diff --git a/test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir b/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
similarity index 58%
rename from test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir
rename to test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
index adbffe239ff..b78a2cb2719 100644
--- a/test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir
+++ b/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
@@ -1,15 +1,32 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
 #
-# Verify that we can fold G_SUB into G_ICMP when we have a pattern like this:
+# Verify folding operations into G_ICMP.
+#
+# E.g cmn/adds folding:
 #
 # x = G_SUB 0, y
 # G_ICMP intpred(something_safe) z, x
 #
+# Folds to:
+# adds z, y
+#
 # Where "something_safe" is ne or eq.
 #
+# ands/tst folding:
+#
+# z = G_AND x, y
+# G_ICMP z, 0
+#
+# Folds to:
+#
+# tst x, y
+#
+# When we have signed comparisons.
+#
 # Tests whose names start with cmn_ should use ADDS for the G_ICMP. Tests whose
-# names start with no_cmn should use SUBS.
+# names start with no_cmn should use SUBS. Similarly, tests whose names start
+# with TST should use ANDS for the G_ICMP.
 #
 
 ...
@@ -273,3 +290,191 @@ body:             |
     RET_ReallyLR implicit $x0
 
 ...
+---
+name:            tst_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: tst_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+    ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK: $wzr = ANDSWrr [[MOVi32imm]], [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 0, implicit $nzcv
+    ; CHECK: $w0 = COPY [[CSELWr]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 0
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32) = G_AND %2, %1
+    %8:gpr(s32) = G_CONSTANT i32 0
+    %7:gpr(s32) = G_ICMP intpred(eq), %3(s32), %8
+    %4:gpr(s1) = G_TRUNC %7(s32)
+    %5:gpr(s32) = G_SELECT %4(s1), %6, %2
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            tst_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: tst_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 0
+    ; CHECK: [[MOVi64imm1:%[0-9]+]]:gpr64 = MOVi64imm 1
+    ; CHECK: $xzr = ANDSXrr [[MOVi64imm]], [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[MOVi64imm1]], [[MOVi64imm]], 0, implicit $nzcv
+    ; CHECK: $x0 = COPY [[CSELXr]]
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = G_CONSTANT i64 0
+    %6:gpr(s64) = G_CONSTANT i64 1
+    %3:gpr(s64) = G_AND %2, %1
+    %8:gpr(s64) = G_CONSTANT i64 0
+    %7:gpr(s32) = G_ICMP intpred(eq), %3(s64), %8
+    %4:gpr(s1) = G_TRUNC %7(s32)
+    %5:gpr(s64) = G_SELECT %4(s1), %6, %2
+    $x0 = COPY %5(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            no_tst_unsigned_compare
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: no_tst_unsigned_compare
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+    ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32common = ANDWrr [[MOVi32imm]], [[COPY]]
+    ; CHECK: $wzr = SUBSWri [[ANDWrr]], 0, 0, implicit-def $nzcv
+    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK: $w0 = COPY [[CSELWr]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 0
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32) = G_AND %2, %1
+    %8:gpr(s32) = G_CONSTANT i32 0
+    %7:gpr(s32) = G_ICMP intpred(ugt), %3(s32), %8
+    %4:gpr(s1) = G_TRUNC %7(s32)
+    %5:gpr(s32) = G_SELECT %4(s1), %6, %2
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            no_tst_nonzero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: no_tst_nonzero
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+    ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32common = ANDWrr [[MOVi32imm]], [[COPY]]
+    ; CHECK: $wzr = SUBSWri [[ANDWrr]], 42, 0, implicit-def $nzcv
+    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK: $w0 = COPY [[CSELWr]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 0
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32) = G_AND %2, %1
+    %8:gpr(s32) = G_CONSTANT i32 42
+    %7:gpr(s32) = G_ICMP intpred(ugt), %3(s32), %8
+    %4:gpr(s1) = G_TRUNC %7(s32)
+    %5:gpr(s32) = G_SELECT %4(s1), %6, %2
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            imm_tst
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: imm_tst
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: $wzr = ANDSWri [[COPY]], 1, implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 0
+    %3:gpr(s32) = G_CONSTANT i32 1
+
+    ; This can be represented as a logical immediate, so we can pull it into
+    ; the ANDS. We should get ANDSWri.
+    %4:gpr(s32) = G_CONSTANT i32 3
+
+    %5:gpr(s32) = G_AND %1, %4
+    %6:gpr(s32) = G_ICMP intpred(eq), %5(s32), %2
+    $w0 = COPY %6(s32)
+    RET_ReallyLR implicit $w0
+
+
+...
+---
+name:            no_imm_tst_not_logical_imm
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: no_imm_tst_not_logical_imm
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -1
+    ; CHECK: $wzr = ANDSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 0
+    %3:gpr(s32) = G_CONSTANT i32 1
+
+    ; This immediate can't be represented as a logical immediate. We shouldn't
+    ; select ANDSWri.
+    %4:gpr(s32) = G_CONSTANT i32 -1
+
+    %5:gpr(s32) = G_AND %1, %4
+    %6:gpr(s32) = G_ICMP intpred(eq), %5(s32), %2
+    $w0 = COPY %6(s32)
+    RET_ReallyLR implicit $w0