This is a follow-up for r273544.
The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods.
This commit also removes two command-line flags that weren't used in any of the
tests: widen-vmovs and swift-partial-update-clearance. The former may be easily
replaced with the mattr mechanism, but the latter may not (as it is a subtarget
property, and not a proper feature).
Differential Revision: http://reviews.llvm.org/D21797
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274620
91177308-0d34-0410-b5e6-
96231b3b80d8
def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
"SlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
+def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
+ "DontWidenVMOVS", "true",
+ "Don't widen VMOVS to VMOVD">;
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
// VFP to NEON, as an execution domain optimization.
// FIXME: A15 has currently the same Schedule model as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
+ FeatureDontWidenVMOVS,
FeatureHasRetAddrStack,
FeatureMuxedUnits,
FeatureTrustZone,
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
-static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
- cl::desc("Widen ARM vmovs to vmovd when possible"));
-
-static cl::opt<unsigned>
-SwiftPartialUpdateClearance("swift-partial-update-clearance",
- cl::Hidden, cl::init(12),
- cl::desc("Clearance before partial register updates"));
-
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI.isCopy() || Subtarget.isCortexA15() ||
- Subtarget.isFPOnlySP())
+ if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats
unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {
- if (!SwiftPartialUpdateClearance ||
- !(Subtarget.isSwift() || Subtarget.isCortexA15()))
+ auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
+ if (!PartialUpdateClearance)
return 0;
assert(TRI && "Need TRI instance");
// MI has an unwanted D-register dependency.
// Avoid defs in the previous N instructrions.
- return SwiftPartialUpdateClearance;
+ return PartialUpdateClearance;
}
// Break a partial register dependency after getPartialRegUpdateClearance
case CortexA15:
MaxInterleaveFactor = 2;
PreISelOperandLatencyAdjustment = 1;
+ PartialUpdateClearance = 12;
break;
case CortexA17:
case CortexA32:
MaxInterleaveFactor = 2;
LdStMultipleTiming = SingleIssuePlusExtras;
PreISelOperandLatencyAdjustment = 1;
+ PartialUpdateClearance = 12;
break;
}
}
/// If true, the AGU and NEON/FPU units are multiplexed.
bool HasMuxedUnits = false;
+ /// If true, VMOVS will never be widened to VMOVD
+ bool DontWidenVMOVS = false;
+
/// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
bool UseNEONForFPMovs = false;
unsigned MaxInterleaveFactor = 1;
+ /// Clearance before partial register updates (in number of instructions)
+ unsigned PartialUpdateClearance = 0;
+
/// What kind of timing do load multiple/store multiple have (double issue,
/// single issue etc).
ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue;
bool hasSlowOddRegister() const { return SlowOddRegister; }
bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
bool hasMuxedUnits() const { return HasMuxedUnits; }
+ bool dontWidenVMOVS() const { return DontWidenVMOVS; }
bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+ unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
+
ARMLdStMultipleTiming getLdStMultipleTiming() const {
return LdStMultipleTiming;
}
-; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
target triple = "thumbv7-apple-ios"
; The 1.0e+10 constant is loaded from the constant pool and kept in a register.