[ARM] Do not test for CPUs, use SubtargetFeatures (Part 2). NFCI

author Diana Picus <diana.picus@linaro.org>

Mon, 27 Jun 2016 09:08:23 +0000 (09:08 +0000)

committer Diana Picus <diana.picus@linaro.org>

Mon, 27 Jun 2016 09:08:23 +0000 (09:08 +0000)
author Diana Picus <diana.picus@linaro.org>
Mon, 27 Jun 2016 09:08:23 +0000 (09:08 +0000)
committer Diana Picus <diana.picus@linaro.org>
Mon, 27 Jun 2016 09:08:23 +0000 (09:08 +0000)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td

index 199d250f4d558d0b89f6f8e8ae2d409d52f20fb8..8012612a07471eb3cd15fc48473d4d7276922aa9 100644 (file)
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -144,6 +144,17 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
                                          "true",
                                          "Use NEON for single precision FP">;
  
+// On some processors, VLDn instructions that access unaligned data take one
+// extra cycle. Take that into account when computing operand latencies.
+def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
+                                             "true",
+                                             "Check for VLDn unaligned access">;
+
+// Some processors have a nonpipelined VFP coprocessor.
+def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
+                                              "NonpipelinedVFP", "true",
+                                          "VFP instructions are not pipelined">;
+
  // Some processors have FP multiply-accumulate instructions that don't
  // play nicely with other VFP / NEON instructions, and it's generally better
  // to just not use them.
@@ -552,6 +563,7 @@ def : ProcessorModel<"cortex-a7",   CortexA8Model,      [ARMv7a, ProcA7,
  
  def : ProcessorModel<"cortex-a8",   CortexA8Model,      [ARMv7a, ProcA8,
                                                           FeatureHasRetAddrStack,
+                                                         FeatureNonpipelinedVFP,
                                                           FeatureTrustZone,
                                                           FeatureSlowFPBrcc,
                                                           FeatureHasSlowFPVMLx,
@@ -567,6 +579,7 @@ def : ProcessorModel<"cortex-a9",   CortexA9Model,      [ARMv7a, ProcA9,
                                                           FeatureAvoidPartialCPSR,
                                                           FeaturePreferVMOVSR,
                                                           FeatureNEONForFPMovs,
+                                                         FeatureCheckVLDnAlign,
                                                           FeatureMP]>;
  
  // FIXME: A12 has currently the same Schedule model as A9
@@ -589,6 +602,7 @@ def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
                                                           FeatureT2XtPk,
                                                           FeatureVFP4,
                                                           FeatureMP,
+                                                         FeatureCheckVLDnAlign,
                                                           FeatureHWDiv,
                                                           FeatureHWDivARM,
                                                           FeatureAvoidPartialCPSR,
@@ -612,6 +626,7 @@ def : ProcessorModel<"cortex-a17",  CortexA9Model,      [ARMv7a, ProcA17,
  //        division features.
  def : ProcessorModel<"krait",       CortexA9Model,      [ARMv7a, ProcKrait,
                                                           FeatureHasRetAddrStack,
+                                                         FeatureCheckVLDnAlign,
                                                           FeatureVMLxForwarding,
                                                           FeatureT2XtPk,
                                                           FeatureFP16,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 6e8028efd646a24a228bcdf7a1b5180b26966a4c..7ff27b6be3ebbc5582c56587ea440ca1e196836c 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3024,6 +3024,45 @@ unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
    return Size / 4;
  }
  
+static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
+                                                    unsigned NumRegs) {
+  unsigned UOps = 1 + NumRegs; // 1 for address computation.
+  switch (Opc) {
+  default:
+    break;
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tLDMIA_UPD:
+  case ARM::tSTMIA_UPD:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD:
+    ++UOps; // One for base register writeback.
+    break;
+  case ARM::LDMIA_RET:
+  case ARM::tPOP_RET:
+  case ARM::t2LDMIA_RET:
+    UOps += 2; // One for base reg wb, one for write to pc.
+    break;
+  }
+  return UOps;
+}
+
  unsigned
  ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
                                   const MachineInstr *MI) const {
@@ -3107,65 +3146,35 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
    case ARM::t2STMIA_UPD:
    case ARM::t2STMDB_UPD: {
      unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
-    if (Subtarget.isSwift()) {
-      int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
-      switch (Opc) {
-      default: break;
-      case ARM::VLDMDIA_UPD:
-      case ARM::VLDMDDB_UPD:
-      case ARM::VLDMSIA_UPD:
-      case ARM::VLDMSDB_UPD:
-      case ARM::VSTMDIA_UPD:
-      case ARM::VSTMDDB_UPD:
-      case ARM::VSTMSIA_UPD:
-      case ARM::VSTMSDB_UPD:
-      case ARM::LDMIA_UPD:
-      case ARM::LDMDA_UPD:
-      case ARM::LDMDB_UPD:
-      case ARM::LDMIB_UPD:
-      case ARM::STMIA_UPD:
-      case ARM::STMDA_UPD:
-      case ARM::STMDB_UPD:
-      case ARM::STMIB_UPD:
-      case ARM::tLDMIA_UPD:
-      case ARM::tSTMIA_UPD:
-      case ARM::t2LDMIA_UPD:
-      case ARM::t2LDMDB_UPD:
-      case ARM::t2STMIA_UPD:
-      case ARM::t2STMDB_UPD:
-        ++UOps; // One for base register writeback.
-        break;
-      case ARM::LDMIA_RET:
-      case ARM::tPOP_RET:
-      case ARM::t2LDMIA_RET:
-        UOps += 2; // One for base reg wb, one for write to pc.
-        break;
-      }
-      return UOps;
-    } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+    switch (Subtarget.getLdStMultipleTiming()) {
+    case ARMSubtarget::SingleIssuePlusExtras:
+      return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
+    case ARMSubtarget::SingleIssue:
+      // Assume the worst.
+      return NumRegs;
+    case ARMSubtarget::DoubleIssue: {
        if (NumRegs < 4)
          return 2;
        // 4 registers would be issued: 2, 2.
        // 5 registers would be issued: 2, 2, 1.
-      int A8UOps = (NumRegs / 2);
+      unsigned UOps = (NumRegs / 2);
        if (NumRegs % 2)
-        ++A8UOps;
-      return A8UOps;
-    } else if (Subtarget.isLikeA9()) {
-      int A9UOps = (NumRegs / 2);
+        ++UOps;
+      return UOps;
+    }
+    case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
+      unsigned UOps = (NumRegs / 2);
        // If there are odd number of registers or if it's not 64-bit aligned,
        // then it takes an extra AGU (Address Generation Unit) cycle.
-      if ((NumRegs % 2) ||
-          !MI->hasOneMemOperand() ||
+      if ((NumRegs % 2) || !MI->hasOneMemOperand() ||
            (*MI->memoperands_begin())->getAlignment() < 8)
-        ++A9UOps;
-      return A9UOps;
-    } else {
-      // Assume the worst.
-      return NumRegs;
+        ++UOps;
+      return UOps;
+      }
      }
    }
    }
+  llvm_unreachable("Didn't find the number of microops");
  }
  
  int
@@ -3542,7 +3551,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
      }
    }
  
-  if (DefAlign < 8 && Subtarget.isLikeA9()) {
+  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
      switch (DefMCID->getOpcode()) {
      default: break;
      case ARM::VLD1q8:
@@ -3767,10 +3776,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
  
    if (!UseNode->isMachineOpcode()) {
      int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
-    if (Subtarget.isLikeA9() || Subtarget.isSwift())
-      return Latency <= 2 ? 1 : Latency - 1;
-    else
-      return Latency <= 3 ? 1 : Latency - 2;
+    int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
+    int Threshold = 1 + Adj;
+    return Latency <= Threshold ? 1 : Latency - Adj;
    }
  
    const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
@@ -3841,7 +3849,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
      }
    }
  
-  if (DefAlign < 8 && Subtarget.isLikeA9())
+  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
      switch (DefMCID.getOpcode()) {
      default: break;
      case ARM::VLD1q8:
@@ -4060,9 +4068,8 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
                        const MachineInstr *UseMI, unsigned UseIdx) const {
    unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
    unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
-  if (Subtarget.isCortexA8() &&
+  if (Subtarget.nonpipelinedVFP() &&
        (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
-    // CortexA8 VFP instructions are not pipelined.
      return true;
  
    // Hoist VFP / NEON instructions with 4 or higher latency.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp

index d58c55ad716f38a3b24ce96c8f8b6da4b845f3d9..cc8ceac7f7303d0e350bbf65d6707d30ef689668 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -109,10 +109,13 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
        Has8MSecExt(false), HasCrypto(false), HasCRC(false), HasRAS(false),
        HasZeroCycleZeroing(false), IsProfitableToUnpredicate(false),
        HasSlowVGETLNi32(false), HasSlowVDUP32(false), PreferVMOVSR(false),
-      PreferISHST(false), UseNEONForFPMovs(false), StrictAlign(false),
-      RestrictIT(false), HasDSP(false), UseNaClTrap(false), GenLongCalls(false),
+      PreferISHST(false), UseNEONForFPMovs(false), CheckVLDnAlign(false),
+      NonpipelinedVFP(false), StrictAlign(false), RestrictIT(false),
+      HasDSP(false), UseNaClTrap(false), GenLongCalls(false),
        UnsafeFPMath(false), UseSjLjEH(false), stackAlignment(4), CPUString(CPU),
-      IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
+      MaxInterleaveFactor(1), LdStMultipleTiming(SingleIssue),
+      PreISelOperandLatencyAdjustment(2), IsLittle(IsLittle), TargetTriple(TT),
+      Options(TM.Options), TM(TM),
        FrameLowering(initializeFrameLowering(CPU, FS)),
        // At this point initializeSubtargetDependencies has been called so
        // we can query directly.
@@ -221,6 +224,51 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
    if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
        (Options.UnsafeFPMath || isTargetDarwin()))
      UseNEONForSinglePrecisionFP = true;
+
+  // FIXME: Teach TableGen to deal with these instead of doing it manually here.
+  switch (ARMProcFamily) {
+  case Others:
+  case CortexA5:
+    break;
+  case CortexA7:
+    LdStMultipleTiming = DoubleIssue;
+    break;
+  case CortexA8:
+    LdStMultipleTiming = DoubleIssue;
+    break;
+  case CortexA9:
+    LdStMultipleTiming = DoubleIssueCheckUnalignedAccess;
+    PreISelOperandLatencyAdjustment = 1;
+    break;
+  case CortexA12:
+    break;
+  case CortexA15:
+    MaxInterleaveFactor = 2;
+    PreISelOperandLatencyAdjustment = 1;
+    break;
+  case CortexA17:
+  case CortexA32:
+  case CortexA35:
+  case CortexA53:
+  case CortexA57:
+  case CortexA72:
+  case CortexA73:
+  case CortexR4:
+  case CortexR4F:
+  case CortexR5:
+  case CortexR7:
+  case CortexM3:
+  case ExynosM1:
+    break;
+  case Krait:
+    PreISelOperandLatencyAdjustment = 1;
+    break;
+  case Swift:
+    MaxInterleaveFactor = 2;
+    LdStMultipleTiming = SingleIssuePlusExtras;
+    PreISelOperandLatencyAdjustment = 1;
+    break;
+  }
  }
  
  bool ARMSubtarget::isAPCS_ABI() const {
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index 6685a682794aaab62dd967d529d60b9ee3191686..5fa7b546ad1973ab2da199647f355d1d2c8260ea 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -56,6 +56,22 @@ protected:
      ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline
    };
  
+public:
+  /// What kind of timing do load multiple/store multiple instructions have.
+  enum ARMLdStMultipleTiming {
+    /// Can load/store 2 registers/cycle.
+    DoubleIssue,
+    /// Can load/store 2 registers/cycle, but needs an extra cycle if the access
+    /// is not 64-bit aligned.
+    DoubleIssueCheckUnalignedAccess,
+    /// Can load/store 1 register/cycle.
+    SingleIssue,
+    /// Can load/store 1 register/cycle, but needs an extra cycle for address
+    /// computation and potentially also for register writeback.
+    SingleIssuePlusExtras,
+  };
+
+protected:
    /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
    ARMProcFamilyEnum ARMProcFamily;
  
@@ -236,6 +252,12 @@ protected:
    /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
    bool UseNEONForFPMovs;
  
+  /// If true, VLDn instructions take an extra cycle for unaligned accesses.
+  bool CheckVLDnAlign;
+
+  /// If true, VFP instructions are not pipelined.
+  bool NonpipelinedVFP;
+
    /// StrictAlign - If true, the subtarget disallows unaligned memory
    /// accesses for some types.  For details, see
    /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
@@ -268,6 +290,16 @@ protected:
    /// CPUString - String name of used CPU.
    std::string CPUString;
  
+  unsigned MaxInterleaveFactor;
+
+  /// What kind of timing do load multiple/store multiple have (double issue,
+  /// single issue etc).
+  ARMLdStMultipleTiming LdStMultipleTiming;
+
+  /// The adjustment that we need to apply to get the operand latency from the
+  /// operand cycle returned by the itinerary data for pre-ISel operands.
+  int PreISelOperandLatencyAdjustment;
+
    /// IsLittle - The target is Little Endian
    bool IsLittle;
  
@@ -400,6 +432,8 @@ public:
    bool preferVMOVSR() const { return PreferVMOVSR; }
    bool preferISHSTBarriers() const { return PreferISHST; }
    bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
+  bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
+  bool nonpipelinedVFP() const { return NonpipelinedVFP; }
    bool prefers32BitThumb() const { return Pref32BitThumb; }
    bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
    bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
@@ -538,6 +572,16 @@ public:
    /// function for this subtarget.
    unsigned getStackAlignment() const { return stackAlignment; }
  
+  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+
+  ARMLdStMultipleTiming getLdStMultipleTiming() const {
+    return LdStMultipleTiming;
+  }
+
+  int getPreISelOperandLatencyAdjustment() const {
+    return PreISelOperandLatencyAdjustment;
+  }
+
    /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
    /// symbol.
    bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h

index 76b3f58de65d2d471503a32f303818dd6be9f31e..80e997952191b2c6e995fc8db8dd184360ce9af2 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -97,10 +97,7 @@ public:
    }
  
    unsigned getMaxInterleaveFactor(unsigned VF) {
-    // These are out of order CPUs:
-    if (ST->isCortexA15() || ST->isSwift())
-      return 2;
-    return 1;
+    return ST->getMaxInterleaveFactor();
    }
  
    int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
author	Diana Picus <diana.picus@linaro.org>
	Mon, 27 Jun 2016 09:08:23 +0000 (09:08 +0000)
committer	Diana Picus <diana.picus@linaro.org>
	Mon, 27 Jun 2016 09:08:23 +0000 (09:08 +0000)
lib/Target/ARM/ARM.td		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history