From: Diana Picus Date: Thu, 23 Jun 2016 07:47:35 +0000 (+0000) Subject: [ARM] Do not test for CPUs, use SubtargetFeatures (Part 1). NFCI X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f8f2c7f04e28ea3cb439ad057ccad244b6ce5385;p=llvm [ARM] Do not test for CPUs, use SubtargetFeatures (Part 1). NFCI This is a cleanup commit similar to r271555, but for ARM. The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods. Since the ARM backend seems to have quite a lot of calls to these methods, I intend to submit 5-6 subtarget features at a time, instead of one big lump. Differential Revision: http://reviews.llvm.org/D21432 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273544 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index a336c4f1a68..199d250f4d5 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -106,6 +106,44 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; +// Whether or not it may be profitable to unpredicate certain instructions +// during if conversion. +def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", + "IsProfitableToUnpredicate", + "true", + "Is profitable to unpredicate">; + +// Some targets (e.g. Swift) have microcoded VGETLNi32. +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; + +// Some targets (e.g. Swift) have microcoded VDUP32. +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", + "Has slow VDUP32 - prefer VMOV">; + +// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON +// for scalar FP, as this allows more effective execution domain optimization. +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; + +// Swift has ISHST barriers compatible with Atomic Release semantics but weaker +// than ISH +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", + "true", "Prefer ISHST barriers">; + +// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from +// VFP to NEON, as an execution domain optimization. +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", + "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">; + +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. This affects instruction selection and should +// only be enabled if the handling of denormals is not important. +def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; + // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. @@ -117,12 +155,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", "HasVMLxForwarding", "true", "Has multiplier accumulator forwarding">; -// Some processors benefit from using NEON instructions for scalar -// single-precision FP operations. -def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; - // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; @@ -533,6 +565,8 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR, + FeaturePreferVMOVSR, + FeatureNEONForFPMovs, FeatureMP]>; // FIXME: A12 has currently the same Schedule model as A9 @@ -596,7 +630,11 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, + FeatureProfUnpredicate, + FeaturePrefISHSTBarrier, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32]>; // FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f9363497d0b..6e8028efd64 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1766,9 +1766,9 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { - // Reduce false anti-dependencies to let Swift's out-of-order execution + // Reduce false anti-dependencies to let the target's out-of-order execution // engine do its thing. - return Subtarget.isSwift(); + return Subtarget.isProfitableToUnpredicate(); } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -4178,7 +4178,7 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { // CortexA9 is particularly picky about mixing the two and wants these // converted. - if (Subtarget.isCortexA9() && !isPredicated(*MI) && + if (Subtarget.useNEONForFPMovs() && !isPredicated(*MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e71dc0f14c1..4c575bd9f1f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3024,7 +3024,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; - } else if (Subtarget->isSwift() && Ord == AtomicOrdering::Release) { + } else if (Subtarget->preferISHSTBarriers() && + Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! @@ -12236,7 +12237,7 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, /*FALLTHROUGH*/ case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: - if (Subtarget->isSwift()) + if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b57f3e84d3b..64ee125e1df 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -321,19 +321,16 @@ def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" " Subtarget->hasVFP4()) || " "Subtarget->isTargetDarwin()">; -// VGETLNi32 is microcoded on Swift - prefer VMOV. -def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; -def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; - -// VDUP.32 is microcoded on Swift - prefer VMOV. -def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; -def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; - -// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as -// this allows more effective execution domain optimization. See -// setExecutionDomain(). -def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; -def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; +def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; + +def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">; +def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">; + +def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" + "!Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" + "Subtarget->useNEONForSinglePrecisionFP()">; def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 5f0a4897318..0cc57a248b7 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -154,6 +154,12 @@ void ARMSubtarget::initializeEnvironment() { HasCRC = false; HasRAS = false; HasZeroCycleZeroing = false; + IsProfitableToUnpredicate = false; + HasSlowVGETLNi32 = false; + HasSlowVDUP32 = false; + PreferVMOVSR = false; + PreferISHST = false; + UseNEONForFPMovs = false; StrictAlign = false; HasDSP = false; UseNaClTrap = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 1eeb6224206..3c1797d858a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -218,6 +218,24 @@ protected: /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; + /// If true, if conversion may decide to leave some instructions unpredicated. + bool IsProfitableToUnpredicate; + + /// If true, VMOV will be favored over VGETLNi32. + bool HasSlowVGETLNi32; + + /// If true, VMOV will be favored over VDUP. + bool HasSlowVDUP32; + + /// If true, VMOVSR will be favored over VMOVDRR. + bool PreferVMOVSR; + + /// If true, ISHST barriers will be used for Release semantics. + bool PreferISHST; + + /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. + bool UseNEONForFPMovs; + /// StrictAlign - If true, the subtarget disallows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). @@ -376,6 +394,12 @@ public: bool hasTrustZone() const { return HasTrustZone; } bool has8MSecExt() const { return Has8MSecExt; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } + bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } + bool hasSlowVDUP32() const { return HasSlowVDUP32; } + bool preferVMOVSR() const { return PreferVMOVSR; } + bool preferISHSTBarriers() const { return PreferISHST; } + bool useNEONForFPMovs() const { return UseNEONForFPMovs; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }