[AArch64][GlobalISel] Use fcsel instead of csel for G_SELECT on FPRs

author Jessica Paquette <jpaquette@apple.com>

Fri, 3 May 2019 22:37:46 +0000 (22:37 +0000)

committer Jessica Paquette <jpaquette@apple.com>

Fri, 3 May 2019 22:37:46 +0000 (22:37 +0000)
author Jessica Paquette <jpaquette@apple.com>
Fri, 3 May 2019 22:37:46 +0000 (22:37 +0000)
committer Jessica Paquette <jpaquette@apple.com>
Fri, 3 May 2019 22:37:46 +0000 (22:37 +0000)
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp

index 76bbd9489afc950e0f8822be3dc4f17e6cf8fe76..bb878ef2f5c308fdd95e92e843895764a295ab98 100644 (file)
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -521,6 +521,36 @@ static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
    return true;
  }
  
+/// Helper function to get the source and destination register classes for a
+/// copy. Returns a std::pair containing the source register class for the
+/// copy, and the destination register class for the copy. If a register class
+/// cannot be determined, then it will be nullptr.
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
+                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+                     const RegisterBankInfo &RBI) {
+  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+  // Special casing for cross-bank copies of s1s. We can technically represent
+  // a 1-bit value with any size of register. The minimum size for a GPR is 32
+  // bits. So, we need to put the FPR on 32 bits as well.
+  //
+  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
+  // then we can pull it into the helpers that get the appropriate class for a
+  // register bank. Or make a new helper that carries along some constraint
+  // information.
+  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+    SrcSize = DstSize = 32;
+
+  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
+          getMinClassForRegBank(DstRegBank, DstSize, true)};
+}
+
  static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                         MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                         const RegisterBankInfo &RBI) {
@@ -529,8 +559,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
    unsigned SrcReg = I.getOperand(1).getReg();
    const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
    const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
-  const TargetRegisterClass *DstRC = getMinClassForRegBank(
-      DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
+
+  // Find the correct register classes for the source and destination registers.
+  const TargetRegisterClass *SrcRC;
+  const TargetRegisterClass *DstRC;
+  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
+
    if (!DstRC) {
      LLVM_DEBUG(dbgs() << "Unexpected dest size "
                        << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
@@ -563,8 +597,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
    // a SUBREG_TO_REG.
    if (I.isCopy()) {
      // Yes. Check if there's anything to fix up.
-    const TargetRegisterClass *SrcRC = getMinClassForRegBank(
-        SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
      if (!SrcRC) {
        LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
        return false;
@@ -1724,12 +1756,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
      const unsigned TReg = I.getOperand(2).getReg();
      const unsigned FReg = I.getOperand(3).getReg();
  
+    // If we have a floating-point result, then we should use a floating point
+    // select instead of an integer select.
+    bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+                 AArch64::GPRRegBankID);
      unsigned CSelOpc = 0;
  
      if (Ty == LLT::scalar(32)) {
-      CSelOpc = AArch64::CSELWr;
+      CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
      } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
-      CSelOpc = AArch64::CSELXr;
+      CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
      } else {
        return false;
      }
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp

index dd6f57d02f009242e201b2c3e046ba7d452decf2..7fdcde44e514a5d455e150ab8128fd0c9cd53ff0 100644 (file)
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -476,6 +476,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
    const TargetSubtargetInfo &STI = MF.getSubtarget();
    const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
  
+  // Helper lambda that returns true if MI has floating point constraints.
+  auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
+    unsigned Op = MI.getOpcode();
+
+    // Do we have an explicit floating point instruction?
+    if (isPreISelGenericFloatingPointOpcode(Op))
+      return true;
+
+    // No. Check if we have a copy-like instruction. If we do, then we could
+    // still be fed by floating point instructions.
+    if (Op != TargetOpcode::COPY && !MI.isPHI())
+      return false;
+
+    // MI is copy-like. Return true if it's using an FPR.
+    return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+           &AArch64::FPRRegBank;
+  };
+
    switch (Opc) {
      // G_{F|S|U}REM are not listed because they are not legal.
      // Arithmetic ops.
@@ -657,30 +675,27 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
        break;
      }
      break;
+  case TargetOpcode::G_SELECT: {
+    // If the destination is FPR, preserve that.
+    if (OpRegBankIdx[0] != PMI_FirstGPR)
+      break;
+    LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+    if (SrcTy.isVector() ||
+        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
+      // Set the register bank of every operand to FPR.
+      for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
+           Idx < NumOperands; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+    }
+    break;
+  }
    case TargetOpcode::G_UNMERGE_VALUES: {
      // If the first operand belongs to a FPR register bank, then make sure that
      // we preserve that.
      if (OpRegBankIdx[0] != PMI_FirstGPR)
        break;
  
-    // Helper lambda that returns true if MI has floating point constraints.
-    auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
-      unsigned Op = MI.getOpcode();
-
-      // Do we have an explicit floating point instruction?
-      if (isPreISelGenericFloatingPointOpcode(Op))
-        return true;
-
-      // No. Check if we have a copy-like instruction. If we do, then we could
-      // still be fed by floating point instructions.
-      if (Op != TargetOpcode::COPY && !MI.isPHI())
-        return false;
-
-      // MI is copy-like. Return true if it's using an FPR.
-      return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
-             &AArch64::FPRRegBank;
-    };
-
      LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
      // UNMERGE into scalars from a vector should always use FPR.
      // Likewise if any of the uses are FP instructions.
diff --git a/test/CodeGen/AArch64/GlobalISel/regbank-select.mir b/test/CodeGen/AArch64/GlobalISel/regbank-select.mir

new file mode 100644 (file)

index 0000000..97b5434
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
@@ -0,0 +1,60 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            select_f32
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $s0, $s1, $w0
+
+    ; CHECK-LABEL: name: select_f32
+    ; CHECK: liveins: $s0, $s1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s32) = COPY $s0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s32) = COPY $s1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $s0 = COPY [[SELECT]](s32)
+    ; CHECK: RET_ReallyLR implicit $s0
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s32) = COPY $s0
+    %2:_(s32) = COPY $s1
+    %4:_(s32) = G_SELECT %0(s1), %1, %2
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            select_f64
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+
+    ; CHECK-LABEL: name: select_f64
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $d1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
diff --git a/test/CodeGen/AArch64/GlobalISel/select-select.mir b/test/CodeGen/AArch64/GlobalISel/select-select.mir

new file mode 100644 (file)

index 0000000..e3acf29
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            select_f32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $s0, $s1, $w0
+
+    ; CHECK-LABEL: name: select_f32
+    ; CHECK: liveins: $s0, $s1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+    ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %3:gpr(s32) = COPY $w0
+    %0:gpr(s1) = G_TRUNC %3(s32)
+    %1:fpr(s32) = COPY $s0
+    %2:fpr(s32) = COPY $s1
+    %5:fpr(s1) = COPY %0(s1)
+    %4:fpr(s32) = G_SELECT %5(s1), %1, %2
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            select_f64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+
+    ; CHECK-LABEL: name: select_f64
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+    ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+    ; CHECK: $d0 = COPY [[FCSELDrrr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %3:gpr(s32) = COPY $w0
+    %0:gpr(s1) = G_TRUNC %3(s32)
+    %1:fpr(s64) = COPY $d0
+    %2:fpr(s64) = COPY $d1
+    %5:fpr(s1) = COPY %0(s1)
+    %4:fpr(s64) = G_SELECT %5(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
diff --git a/test/CodeGen/AArch64/fast-isel-select.ll b/test/CodeGen/AArch64/fast-isel-select.ll

index e06f74cb7fefe62b50984f4fc67fc7baf00e7b31..30ad4b811233b7ca4c387f877dc1c73510cc0a24 100644 (file)
--- a/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,5 +1,6 @@
  ; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
  ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
  
  ; First test the different supported value types for select.
  define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -46,6 +47,9 @@ define float @select_f32(i1 zeroext %c, float %a, float %b) {
  ; CHECK-LABEL: select_f32
  ; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
  ; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+; GISEL-LABEL: select_f32
+; GISEL:       {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
    %1 = select i1 %c, float %a, float %b
    ret float %1
  }
@@ -54,6 +58,9 @@ define double @select_f64(i1 zeroext %c, double %a, double %b) {
  ; CHECK-LABEL: select_f64
  ; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
  ; CHECK-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
+; GISEL-LABEL: select_f64
+; GISEL:       {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
    %1 = select i1 %c, double %a, double %b
    ret double %1
  }
author	Jessica Paquette <jpaquette@apple.com>
	Fri, 3 May 2019 22:37:46 +0000 (22:37 +0000)
committer	Jessica Paquette <jpaquette@apple.com>
	Fri, 3 May 2019 22:37:46 +0000 (22:37 +0000)
lib/Target/AArch64/AArch64InstructionSelector.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64RegisterBankInfo.cpp		patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/regbank-select.mir	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/GlobalISel/select-select.mir	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/fast-isel-select.ll		patch \| blob \| history