From 728d43b28f1412c8b6bee7e7ef6cec79b872078f Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 16 Oct 2017 01:16:35 +0000 Subject: [PATCH] [globalisel][tblgen] Add support for iPTR and implement am_unscaled* and am_indexed* Summary: iPTR is a pointer of subtarget-specific size to any address space. Therefore type checks on this size derive the SizeInBits from a subtarget hook. At this point, we can import the simplests G_LOAD rules and select load instructions using them. Further patches will support for the predicates to enable additional loads as well as the stores. Depends on D37457 Reviewers: ab, qcolombet, t.p.northover, rovka, aditya_nandakumar Reviewed By: qcolombet Subscribers: kristof.beyls, javed.absar, llvm-commits, igorb Differential Revision: https://reviews.llvm.org/D37458 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315885 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../CodeGen/GlobalISel/InstructionSelector.h | 6 + .../GlobalISel/InstructionSelectorImpl.h | 10 ++ .../GlobalISel/InstructionSelector.cpp | 18 +++ lib/Target/AArch64/AArch64InstrFormats.td | 33 +++++ .../AArch64/AArch64InstructionSelector.cpp | 125 ++++++++++++++++++ .../AArch64/GlobalISel/select-load.mir | 8 +- utils/TableGen/GlobalISelEmitter.cpp | 16 ++- 7 files changed, 205 insertions(+), 11 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 9bc126ed726..1c11207c78f 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -341,6 +341,12 @@ protected: bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; + /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the + /// right-hand side. GlobalISel's separation of pointer and integer types + /// means that we don't need to worry about G_OR with equivalent semantics. + bool isBaseWithConstantOffset(const MachineOperand &Root, + const MachineRegisterInfo &MRI) const; + bool isObviouslySafeToFold(MachineInstr &MI) const; }; diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index 7fb413fceac..c0fc1eaf56b 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -248,10 +248,20 @@ bool InstructionSelector::executeMatchTable( int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; int64_t SizeInBits = MatchTable[CurrentIdx++]; + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs[" << InsnID << "]->getOperand(" << OpIdx << "), SizeInBits=" << SizeInBits << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + // iPTR must be looked up in the target. + if (SizeInBits == 0) { + MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); + SizeInBits = MF->getDataLayout().getPointerSizeInBits(0); + } + + assert(SizeInBits != 0 && "Pointer size must be known"); + const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) { if (handleReject() == RejectAndGiveUp) diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index e4ee2bb7cf4..2a563c9bf5c 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -98,6 +99,23 @@ bool InstructionSelector::isOperandImmEqual( return false; } +bool InstructionSelector::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_GEP) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const { return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().begin() == MI.implicit_operands().end(); diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 572e018da30..1946d5a14dc 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -2516,6 +2516,22 @@ def am_indexed32 : ComplexPattern; def am_indexed64 : ComplexPattern; def am_indexed128 : ComplexPattern; +def gi_am_indexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + class UImm12OffsetOperand : AsmOperandClass { let Name = "UImm12Offset" # Scale; let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; @@ -3146,6 +3162,23 @@ def am_unscaled32 : ComplexPattern; def am_unscaled64 : ComplexPattern; def am_unscaled128 :ComplexPattern; +def gi_am_unscaled8 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled16 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled128 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, string asm, list pattern> : I { diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index ca93d1feaa6..622bf995147 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -66,6 +66,32 @@ private: ComplexRendererFn selectArithImmed(MachineOperand &Root) const; + ComplexRendererFn selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const; + + ComplexRendererFn selectAddrModeUnscaled8(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 1); + } + ComplexRendererFn selectAddrModeUnscaled16(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 2); + } + ComplexRendererFn selectAddrModeUnscaled32(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 4); + } + ComplexRendererFn selectAddrModeUnscaled64(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 8); + } + ComplexRendererFn selectAddrModeUnscaled128(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 16); + } + + ComplexRendererFn selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const; + template + ComplexRendererFn selectAddrModeIndexed(MachineOperand &Root) const { + return selectAddrModeIndexed(Root, Width / 8); + } + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -1392,6 +1418,105 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { }}; } +/// Select a "register plus unscaled signed 9-bit immediate" address. This +/// should only match when there is an offset that is not valid for a scaled +/// immediate addressing mode. The "Size" argument is the size in bytes of the +/// memory reference, which is needed here to know what is valid for a scaled +/// immediate. +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + if (!isBaseWithConstantOffset(Root, MRI)) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + MachineOperand &OffImm = RootDef->getOperand(2); + if (!OffImm.isReg()) + return None; + MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); + if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + int64_t RHSC; + MachineOperand &RHSOp1 = RHS->getOperand(1); + if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) + return None; + RHSC = RHSOp1.getCImm()->getSExtValue(); + + // If the offset is valid as a scaled immediate, don't match here. + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) + return None; + if (RHSC >= -256 && RHSC < 256) { + MachineOperand &Base = RootDef->getOperand(1); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, + }}; + } + return None; +} + +/// Select a "register plus scaled unsigned 12-bit immediate" address. The +/// "Size" argument is the size in bytes of the memory reference, which +/// determines the scale. +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; + } + + if (isBaseWithConstantOffset(Root, MRI)) { + MachineOperand &LHS = RootDef->getOperand(1); + MachineOperand &RHS = RootDef->getOperand(2); + MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + LHSDef = MRI.getVRegDef(LHSDef->getOperand(1).getReg()); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; + } + } + } + + // Before falling back to our general case, check if the unscaled + // instructions can handle this. If so, that's preferable. + if (selectAddrModeUnscaled(Root, Size).hasValue()) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir index 7e0c9d6ebc0..2955788a71e 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -1,9 +1,5 @@ # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -# This patch temporarily causes LD1Onev1d to match instead of LDRDui on a -# couple functions. A patch to support iPTR will follow that fixes this. -# XFAIL: * - --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -533,13 +529,13 @@ registers: # CHECK: body: # CHECK: %0 = COPY %x0 -# CHECK: %1 = LD1Onev2s %0 +# CHECK: %1 = LDRDui %0, 0 :: (load 8 from %ir.addr) # CHECK: %d0 = COPY %1 body: | bb.0: liveins: %x0 %0(p0) = COPY %x0 - %1(<2 x s32>) = G_LOAD %0 :: (load 4 from %ir.addr) + %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) %d0 = COPY %1(<2 x s32>) ... diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp index 3bfaa887301..ee3b459a07d 100644 --- a/utils/TableGen/GlobalISelEmitter.cpp +++ b/utils/TableGen/GlobalISelEmitter.cpp @@ -775,8 +775,8 @@ std::set LLTOperandMatcher::KnownTypes; /// no reliable means to derive the missing type information from the pattern so /// imported rules must test the components of a pointer separately. /// -/// SizeInBits must be non-zero and the matched pointer must be that size. -/// TODO: Add support for iPTR via SizeInBits==0 and a subtarget query. +/// If SizeInBits is zero, then the pointer size will be obtained from the +/// subtarget. class PointerToAnyOperandMatcher : public OperandPredicateMatcher { protected: unsigned SizeInBits; @@ -979,9 +979,15 @@ public: Error addTypeCheckPredicate(const TypeSetByHwMode &VTy, bool OperandIsAPointer) { - auto OpTyOrNone = VTy.isMachineValueType() - ? MVTToLLT(VTy.getMachineValueType().SimpleTy) - : None; + if (!VTy.isMachineValueType()) + return failedImport("unsupported typeset"); + + if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) { + addPredicate(0); + return Error::success(); + } + + auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy); if (!OpTyOrNone) return failedImport("unsupported type"); -- 2.50.1