return true;
}
+/// Helper function to get the source and destination register classes for a
+/// copy. Returns a std::pair containing the source register class for the
+/// copy, and the destination register class for the copy. If a register class
+/// cannot be determined, then it will be nullptr.
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+ // Special casing for cross-bank copies of s1s. We can technically represent
+ // a 1-bit value with any size of register. The minimum size for a GPR is 32
+ // bits. So, we need to put the FPR on 32 bits as well.
+ //
+ // FIXME: I'm not sure if this case holds true outside of copies. If it does,
+ // then we can pull it into the helpers that get the appropriate class for a
+ // register bank. Or make a new helper that carries along some constraint
+ // information.
+ if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+ SrcSize = DstSize = 32;
+
+ return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
+ getMinClassForRegBank(DstRegBank, DstSize, true)};
+}
+
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
unsigned SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- const TargetRegisterClass *DstRC = getMinClassForRegBank(
- DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
+
+ // Find the correct register classes for the source and destination registers.
+ const TargetRegisterClass *SrcRC;
+ const TargetRegisterClass *DstRC;
+ std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
+
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Unexpected dest size "
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
// a SUBREG_TO_REG.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
- const TargetRegisterClass *SrcRC = getMinClassForRegBank(
- SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
return false;
const unsigned TReg = I.getOperand(2).getReg();
const unsigned FReg = I.getOperand(3).getReg();
+ // If we have a floating-point result, then we should use a floating point
+ // select instead of an integer select.
+ bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+ AArch64::GPRRegBankID);
unsigned CSelOpc = 0;
if (Ty == LLT::scalar(32)) {
- CSelOpc = AArch64::CSELWr;
+ CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
- CSelOpc = AArch64::CSELXr;
+ CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
} else {
return false;
}
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ // Helper lambda that returns true if MI has floating point constraints.
+ auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
+ unsigned Op = MI.getOpcode();
+
+ // Do we have an explicit floating point instruction?
+ if (isPreISelGenericFloatingPointOpcode(Op))
+ return true;
+
+ // No. Check if we have a copy-like instruction. If we do, then we could
+ // still be fed by floating point instructions.
+ if (Op != TargetOpcode::COPY && !MI.isPHI())
+ return false;
+
+ // MI is copy-like. Return true if it's using an FPR.
+ return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+ &AArch64::FPRRegBank;
+ };
+
switch (Opc) {
// G_{F|S|U}REM are not listed because they are not legal.
// Arithmetic ops.
break;
}
break;
+ case TargetOpcode::G_SELECT: {
+ // If the destination is FPR, preserve that.
+ if (OpRegBankIdx[0] != PMI_FirstGPR)
+ break;
+ LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ if (SrcTy.isVector() ||
+ any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+ [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
+ // Set the register bank of every operand to FPR.
+ for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
+ Idx < NumOperands; ++Idx)
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ }
+ break;
+ }
case TargetOpcode::G_UNMERGE_VALUES: {
// If the first operand belongs to a FPR register bank, then make sure that
// we preserve that.
if (OpRegBankIdx[0] != PMI_FirstGPR)
break;
- // Helper lambda that returns true if MI has floating point constraints.
- auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
- unsigned Op = MI.getOpcode();
-
- // Do we have an explicit floating point instruction?
- if (isPreISelGenericFloatingPointOpcode(Op))
- return true;
-
- // No. Check if we have a copy-like instruction. If we do, then we could
- // still be fed by floating point instructions.
- if (Op != TargetOpcode::COPY && !MI.isPHI())
- return false;
-
- // MI is copy-like. Return true if it's using an FPR.
- return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
- &AArch64::FPRRegBank;
- };
-
LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
// UNMERGE into scalars from a vector should always use FPR.
// Likewise if any of the uses are FP instructions.
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: select_f32
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $s0, $s1, $w0
+
+ ; CHECK-LABEL: name: select_f32
+ ; CHECK: liveins: $s0, $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr(s32) = COPY $s1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+ ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+ ; CHECK: $s0 = COPY [[SELECT]](s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %3:_(s32) = COPY $w0
+ %0:_(s1) = G_TRUNC %3(s32)
+ %1:_(s32) = COPY $s0
+ %2:_(s32) = COPY $s1
+ %4:_(s32) = G_SELECT %0(s1), %1, %2
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: select_f64
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $d1, $w0
+
+ ; CHECK-LABEL: name: select_f64
+ ; CHECK: liveins: $d0, $d1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+ ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+ ; CHECK: $d0 = COPY [[SELECT]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %3:_(s32) = COPY $w0
+ %0:_(s1) = G_TRUNC %3(s32)
+ %1:_(s64) = COPY $d0
+ %2:_(s64) = COPY $d1
+ %4:_(s64) = G_SELECT %0(s1), %1, %2
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: select_f32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $s0, $s1, $w0
+
+ ; CHECK-LABEL: name: select_f32
+ ; CHECK: liveins: $s0, $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+ ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+ ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+ ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+ ; CHECK: $s0 = COPY [[FCSELSrrr]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %3:gpr(s32) = COPY $w0
+ %0:gpr(s1) = G_TRUNC %3(s32)
+ %1:fpr(s32) = COPY $s0
+ %2:fpr(s32) = COPY $s1
+ %5:fpr(s1) = COPY %0(s1)
+ %4:fpr(s32) = G_SELECT %5(s1), %1, %2
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: select_f64
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $d1, $w0
+
+ ; CHECK-LABEL: name: select_f64
+ ; CHECK: liveins: $d0, $d1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+ ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+ ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+ ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+ ; CHECK: $d0 = COPY [[FCSELDrrr]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %3:gpr(s32) = COPY $w0
+ %0:gpr(s1) = G_TRUNC %3(s32)
+ %1:fpr(s64) = COPY $d0
+ %2:fpr(s64) = COPY $d1
+ %5:fpr(s1) = COPY %0(s1)
+ %4:fpr(s64) = G_SELECT %5(s1), %1, %2
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
; First test the different supported value types for select.
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
; CHECK-LABEL: select_f32
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
+; GISEL-LABEL: select_f32
+; GISEL: {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
%1 = select i1 %c, float %a, float %b
ret float %1
}
; CHECK-LABEL: select_f64
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
+; GISEL-LABEL: select_f64
+; GISEL: {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
%1 = select i1 %c, double %a, double %b
ret double %1
}