From d5a79b972759e8a0bc0b8214738b8beb976e90f6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jun 2019 23:54:58 +0000 Subject: [PATCH] AMDGPU: Consolidate some getGeneration checks This is incomplete, and ideally these would all be removed, but it's better to localize them to the subtarget first with comments about what they're for. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363902 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 9 ++- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 5 +- .../AMDGPU/AMDGPULowerKernelArguments.cpp | 2 +- lib/Target/AMDGPU/AMDGPUSubtarget.h | 57 +++++++++++++++++++ .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 +- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 9 ++- lib/Target/AMDGPU/SIISelLowering.cpp | 15 ++--- lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 12 ++-- 9 files changed, 82 insertions(+), 31 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index a99f62124cf..3c8538cb03e 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1100,7 +1100,7 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset, (OffsetBits == 8 && !isUInt<8>(Offset))) return false; - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || + if (Subtarget->hasUsableDSOffset() || Subtarget->unsafeDSOffsetFoldingEnabled()) return true; @@ -1363,7 +1363,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (!Subtarget->hasAddr64()) return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, @@ -2415,9 +2415,8 @@ SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { } bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { - return false; - } + assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn); + const SIRegisterInfo *SIRI = static_cast(Subtarget->getRegisterInfo()); const SIInstrInfo * SII = diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f93b61035ff..a3fce5ce671 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -443,7 +443,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, unsigned MemSize = Query.MMODescrs[0].SizeInBits; return (MemSize == 96) && Query.Types[0].isVector() && - ST.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS; + !ST.hasDwordx3LoadStores(); }, [=](const LegalityQuery &Query) { return std::make_pair(0, V2S32); @@ -471,8 +471,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, return true; case 96: - // XXX hasLoadX3 - return (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS); + return ST.hasDwordx3LoadStores(); case 256: case 512: diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index b1abe1001e9..5dd5b3691e0 100644 --- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -111,7 +111,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { // integer types. if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && - ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) + !ST.hasUsableDSOffset()) continue; // FIXME: We can replace this with equivalent alias.scope/noalias diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 178fc5be393..3b1ed618677 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -484,6 +484,12 @@ public: return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); } + // Return true if the target only has the reverse operand versions of VALU + // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). + bool hasOnlyRevVALUShifts() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + bool hasBFE() const { return true; } @@ -536,10 +542,48 @@ public: return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } + /// True if the offset field of DS instructions works as expected. On SI, the + /// offset uses a 16-bit adder and does not always wrap properly. + bool hasUsableDSOffset() const { + return getGeneration() >= SEA_ISLANDS; + } + bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; } + /// Condition output from div_scale is usable. + bool hasUsableDivScaleConditionOutput() const { + return getGeneration() != SOUTHERN_ISLANDS; + } + + /// Extra wait hazard is needed in some cases before + /// s_cbranch_vccnz/s_cbranch_vccz. + bool hasReadVCCZBug() const { + return getGeneration() <= SEA_ISLANDS; + } + + /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR + /// was written by a VALU instruction. + bool hasSMRDReadVALUDefHazard() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + + /// A read of an SGPR by a VMEM instruction requires 5 wait states when the + /// SGPR was written by a VALU Instruction. + bool hasVMEMReadSGPRVALUDefHazard() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasRFEHazards() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. + unsigned getSetRegWaitStates() const { + return getGeneration() <= SEA_ISLANDS ? 1 : 2; + } + bool dumpCode() const { return DumpCode; } @@ -571,6 +615,11 @@ public: return CIInsts && EnableDS128; } + /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 + bool haveRoundOpsF64() const { + return CIInsts; + } + /// \returns If MUBUF instructions always perform range checking, even for /// buffer resources used for private memory access. bool privateMemoryResourceIsRangeChecked() const { @@ -620,6 +669,10 @@ public: return FlatAddressSpace; } + bool hasFlatScrRegister() const { + return hasFlatAddressSpace(); + } + bool hasFlatInstOffsets() const { return FlatInstOffsets; } @@ -652,6 +705,10 @@ public: return hasD16LoadStore() && !isSRAMECCEnabled(); } + bool hasD16Images() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + /// Return if most LDS instructions have an m0 use that require m0 to be /// iniitalized. bool ldsRequiresM0Init() const { diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 37c8de93be5..1c17e6054f0 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -401,7 +401,7 @@ int GCNTTIImpl::getArithmeticInstrCost( if (SLT == MVT::f64) { int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); // Add cost of workaround. - if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (!ST->hasUsableDivScaleConditionOutput()) Cost += 3 * getFullRateInstrCost(); return LT.first * Cost * NElts; diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index f19fa684629..1d5ff3c4e7b 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -522,7 +522,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { WaitStatesNeeded = checkSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. - if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (!ST.hasSMRDReadVALUDefHazard()) return WaitStatesNeeded; // A read of an SGPR by SMRD instruction requires 4 wait states when the @@ -561,7 +561,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (!ST.hasVMEMReadSGPRVALUDefHazard()) return 0; int WaitStatesNeeded = checkSoftClauseHazards(VMEM); @@ -640,8 +640,7 @@ int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { const SIInstrInfo *TII = ST.getInstrInfo(); unsigned HWReg = getHWReg(TII, *SetRegInstr); - const int SetRegWaitStates = - ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2; + const int SetRegWaitStates = ST.getSetRegWaitStates(); auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { return HWReg == getHWReg(TII, *MI); }; @@ -787,7 +786,7 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { } int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { - if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (!ST.hasRFEHazards()) return 0; const SIInstrInfo *TII = ST.getInstrInfo(); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 54cc459e148..a159f208aca 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -423,7 +423,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + if (Subtarget->haveRoundOpsF64()) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); @@ -2865,8 +2865,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, } - if ((Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS || - Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) && + if (!Subtarget->hasFlatScrRegister() && Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { report_fatal_error(Twine("invalid register \"" + StringRef(RegName) + "\" for subtarget.")); @@ -4979,8 +4978,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, MVT StoreVT = VData.getSimpleValueType(); if (StoreVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || - !BaseOpcode->HasD16) + if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction IsD16 = true; @@ -4993,8 +4991,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, // and whether packing is supported. MVT LoadVT = ResultTypes[0].getSimpleVT(); if (LoadVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || - !BaseOpcode->HasD16) + if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction IsD16 = true; @@ -7262,7 +7259,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { SDValue Scale; - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (!Subtarget->hasUsableDivScaleConditionOutput()) { // Workaround a hardware bug on SI where the condition output from div_scale // is not usable. @@ -7382,7 +7379,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // out-of-bounds even if base + offsets is in bounds. Split vectorized // stores here to avoid emitting ds_write2_b32. We may re-combine the // store later in the SILoadStoreOptimizer. - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && + if (!Subtarget->hasUsableDSOffset() && NumElements == 2 && VT.getStoreSize() == 8 && Store->getAlignment() < 8) { return SplitVectorStore(Op, DAG); diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 29c891c72af..36570a5d6d5 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1042,7 +1042,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // TODO: Remove this work-around, enable the assert for Bug 457939 // after fixing the scheduler. Also, the Shader Compiler code is // independent of target. - if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) { + if (readsVCCZ(MI) && ST->hasReadVCCZBug()) { if (ScoreBrackets.getScoreLB(LGKM_CNT) < ScoreBrackets.getScoreUB(LGKM_CNT) && ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 5831abb8071..edcbec68a0f 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4597,37 +4597,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; case AMDGPU::S_LSHL_B32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_ASHRREV_I32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHRREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHL_B64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHLREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_ASHRREV_I64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHRREV_B64; swapOperands(Inst); } -- 2.40.0