[ARM] Do not test for CPUs, use SubtargetFeatures (Part 3). NFCI

author Diana Picus <diana.picus@linaro.org>

Wed, 6 Jul 2016 09:22:23 +0000 (09:22 +0000)

committer Diana Picus <diana.picus@linaro.org>

Wed, 6 Jul 2016 09:22:23 +0000 (09:22 +0000)
author Diana Picus <diana.picus@linaro.org>
Wed, 6 Jul 2016 09:22:23 +0000 (09:22 +0000)
committer Diana Picus <diana.picus@linaro.org>
Wed, 6 Jul 2016 09:22:23 +0000 (09:22 +0000)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td

index 8012612a07471eb3cd15fc48473d4d7276922aa9..9f8bc3146c7298de826dbd8786059b54c219b370 100644 (file)
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -132,6 +132,20 @@ def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
  def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
                                             "true", "Prefer ISHST barriers">;
  
+// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
+def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true",
+                                         "Has muxed AGU and NEON/FPU">;
+
+// On some targets, a VLDM/VSTM starting with an odd register number needs more
+// microops than single VLDRS.
+def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
+                     "true", "VLDM/VSTM starting with an odd register is slow">;
+
+// Some targets have a renaming dependency when loading into D subregisters.
+def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
+                                              "SlowLoadDSubregister", "true",
+                                              "Loading into D subregs is slow">;
+
  // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
  // VFP to NEON, as an execution domain optimization.
  def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs",
@@ -578,6 +592,7 @@ def : ProcessorModel<"cortex-a9",   CortexA9Model,      [ARMv7a, ProcA9,
                                                           FeatureFP16,
                                                           FeatureAvoidPartialCPSR,
                                                           FeaturePreferVMOVSR,
+                                                         FeatureMuxedUnits,
                                                           FeatureNEONForFPMovs,
                                                           FeatureCheckVLDnAlign,
                                                           FeatureMP]>;
@@ -598,6 +613,7 @@ def : ProcessorModel<"cortex-a12",  CortexA9Model,      [ARMv7a, ProcA12,
  // FIXME: A15 has currently the same Schedule model as A9.
  def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
                                                           FeatureHasRetAddrStack,
+                                                         FeatureMuxedUnits,
                                                           FeatureTrustZone,
                                                           FeatureT2XtPk,
                                                           FeatureVFP4,
@@ -626,6 +642,7 @@ def : ProcessorModel<"cortex-a17",  CortexA9Model,      [ARMv7a, ProcA17,
  //        division features.
  def : ProcessorModel<"krait",       CortexA9Model,      [ARMv7a, ProcKrait,
                                                           FeatureHasRetAddrStack,
+                                                         FeatureMuxedUnits,
                                                           FeatureCheckVLDnAlign,
                                                           FeatureVMLxForwarding,
                                                           FeatureT2XtPk,
@@ -648,6 +665,8 @@ def : ProcessorModel<"swift",       SwiftModel,         [ARMv7a, ProcSwift,
                                                           FeatureHasSlowFPVMLx,
                                                           FeatureProfUnpredicate,
                                                           FeaturePrefISHSTBarrier,
+                                                         FeatureSlowOddRegister,
+                                                         FeatureSlowLoadDSubreg,
                                                           FeatureSlowVGETLNi32,
                                                           FeatureSlowVDUP32]>;
  
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp

index 0157c0a35286d65ec206d6c61d4c1392c722c4d3..0d904ecb62963b6dffc65134620cdde97058f854 100644 (file)
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -50,8 +50,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
  
        // Skip over one non-VFP / NEON instruction.
        if (!LastMI->isBarrier() &&
-          // On A9, AGU and NEON/FPU are muxed.
-          !(TII.getSubtarget().isLikeA9() && LastMI->mayLoadOrStore()) &&
+          !(TII.getSubtarget().hasMuxedUnits() && LastMI->mayLoadOrStore()) &&
            (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
          MachineBasicBlock::iterator I = LastMI;
          if (I != LastMI->getParent()->begin()) {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp

index 531e7a199b17a3a8fe19f514a38a18dcbacd183f..0b2400a941fe5e5a3073c2aba0f8cef4bfc942b7 100644 (file)
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -982,7 +982,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
      bool CanMergeToLSMulti = true;
      // On swift vldm/vstm starting with an odd register number as that needs
      // more uops than single vldrs.
-    if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+    if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
        CanMergeToLSMulti = false;
  
      // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index 30b11f4486f2a16556cea4253348a20d8d8584f2..ebd8daefdab0ba7a4480eeede27af4a338cf1cf5 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -249,6 +249,16 @@ protected:
    /// If true, ISHST barriers will be used for Release semantics.
    bool PreferISHST = false;
  
+  /// If true, a VLDM/VSTM starting with an odd register number is considered to
+  /// take more microops than single VLDRS/VSTRS.
+  bool SlowOddRegister = false;
+
+  /// If true, loading into a D subregister will be penalized.
+  bool SlowLoadDSubregister = false;
+
+  /// If true, the AGU and NEON/FPU units are multiplexed.
+  bool HasMuxedUnits = false;
+
    /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
    bool UseNEONForFPMovs = false;
  
@@ -382,6 +392,9 @@ public:
    bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
    bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
  
+  /// @{
+  /// These functions are obsolete, please consider adding subtarget features
+  /// or properties instead of calling them.
    bool isCortexA5() const { return ARMProcFamily == CortexA5; }
    bool isCortexA7() const { return ARMProcFamily == CortexA7; }
    bool isCortexA8() const { return ARMProcFamily == CortexA8; }
@@ -392,6 +405,7 @@ public:
    bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
    bool isCortexR5() const { return ARMProcFamily == CortexR5; }
    bool isKrait() const { return ARMProcFamily == Krait; }
+  /// @}
  
    bool hasARMOps() const { return !NoARM; }
  
@@ -431,6 +445,9 @@ public:
    bool hasSlowVDUP32() const { return HasSlowVDUP32; }
    bool preferVMOVSR() const { return PreferVMOVSR; }
    bool preferISHSTBarriers() const { return PreferISHST; }
+  bool hasSlowOddRegister() const { return SlowOddRegister; }
+  bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
+  bool hasMuxedUnits() const { return HasMuxedUnits; }
    bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
    bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
    bool nonpipelinedVFP() const { return NonpipelinedVFP; }
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp

index ddb5d2877801816903222792f3956a7512d51583..a673619b922bec42eb7da4511ea50318f9bd7652 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -259,10 +259,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
                                     unsigned Index) {
    // Penalize inserting into an D-subregister. We end up with a three times
    // lower estimated throughput on swift.
-  if (ST->isSwift() &&
-      Opcode == Instruction::InsertElement &&
-      ValTy->isVectorTy() &&
-      ValTy->getScalarSizeInBits() <= 32)
+  if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
+      ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
      return 3;
  
    if ((Opcode == Instruction::InsertElement ||
author	Diana Picus <diana.picus@linaro.org>
	Wed, 6 Jul 2016 09:22:23 +0000 (09:22 +0000)
committer	Diana Picus <diana.picus@linaro.org>
	Wed, 6 Jul 2016 09:22:23 +0000 (09:22 +0000)
lib/Target/ARM/ARM.td		patch \| blob \| history
lib/Target/ARM/ARMHazardRecognizer.cpp		patch \| blob \| history
lib/Target/ARM/ARMLoadStoreOptimizer.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history