From: Diana Picus Date: Wed, 6 Jul 2016 11:22:11 +0000 (+0000) Subject: [ARM] Do not test for CPUs, use SubtargetFeatures. Also remove 2 flags. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=06a4440843d49ebc9acdb16305dbec06f5993837;p=llvm [ARM] Do not test for CPUs, use SubtargetFeatures. Also remove 2 flags. This is a follow-up for r273544. The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods. This commit also removes two command-line flags that weren't used in any of the tests: widen-vmovs and swift-partial-update-clearance. The former may be easily replaced with the mattr mechanism, but the latter may not (as it is a subtarget property, and not a proper feature). Differential Revision: http://reviews.llvm.org/D21797 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274620 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 9f8bc3146c7..10246167f74 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -145,6 +145,10 @@ def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", "SlowLoadDSubregister", "true", "Loading into D subregs is slow">; +// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", + "DontWidenVMOVS", "true", + "Don't widen VMOVS to VMOVD">; // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. @@ -612,6 +616,7 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, // FIXME: A15 has currently the same Schedule model as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, + FeatureDontWidenVMOVS, FeatureHasRetAddrStack, FeatureMuxedUnits, FeatureTrustZone, diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9ed2f595d46..a1b16525975 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -51,15 +51,6 @@ static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); -static cl::opt -WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), - cl::desc("Widen ARM vmovs to vmovd when possible")); - -static cl::opt -SwiftPartialUpdateClearance("swift-partial-update-clearance", - cl::Hidden, cl::init(12), - cl::desc("Clearance before partial register updates")); - /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -1305,8 +1296,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI.isCopy() || Subtarget.isCortexA15() || - Subtarget.isFPOnlySP()) + if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -4492,8 +4482,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - if (!SwiftPartialUpdateClearance || - !(Subtarget.isSwift() || Subtarget.isCortexA15())) + auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); + if (!PartialUpdateClearance) return 0; assert(TRI && "Need TRI instance"); @@ -4545,7 +4535,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( // MI has an unwanted D-register dependency. // Avoid defs in the previous N instructrions. - return SwiftPartialUpdateClearance; + return PartialUpdateClearance; } // Break a partial register dependency after getPartialRegUpdateClearance diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 851eaf5e65d..1d7eef9ddcf 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -219,6 +219,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexA15: MaxInterleaveFactor = 2; PreISelOperandLatencyAdjustment = 1; + PartialUpdateClearance = 12; break; case CortexA17: case CortexA32: @@ -241,6 +242,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { MaxInterleaveFactor = 2; LdStMultipleTiming = SingleIssuePlusExtras; PreISelOperandLatencyAdjustment = 1; + PartialUpdateClearance = 12; break; } } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ebd8daefdab..72dae746aec 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -259,6 +259,9 @@ protected: /// If true, the AGU and NEON/FPU units are multiplexed. bool HasMuxedUnits = false; + /// If true, VMOVS will never be widened to VMOVD + bool DontWidenVMOVS = false; + /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. bool UseNEONForFPMovs = false; @@ -302,6 +305,9 @@ protected: unsigned MaxInterleaveFactor = 1; + /// Clearance before partial register updates (in number of instructions) + unsigned PartialUpdateClearance = 0; + /// What kind of timing do load multiple/store multiple have (double issue, /// single issue etc). ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue; @@ -448,6 +454,7 @@ public: bool hasSlowOddRegister() const { return SlowOddRegister; } bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } bool hasMuxedUnits() const { return HasMuxedUnits; } + bool dontWidenVMOVS() const { return DontWidenVMOVS; } bool useNEONForFPMovs() const { return UseNEONForFPMovs; } bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } bool nonpipelinedVFP() const { return NonpipelinedVFP; } @@ -591,6 +598,8 @@ public: unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } + unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; } + ARMLdStMultipleTiming getLdStMultipleTiming() const { return LdStMultipleTiming; } diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll index 316cfabab48..2abf8d9701f 100644 --- a/test/CodeGen/ARM/widen-vmovs.ll +++ b/test/CodeGen/ARM/widen-vmovs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s +; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s target triple = "thumbv7-apple-ios" ; The 1.0e+10 constant is loaded from the constant pool and kept in a register.