[X86] Provide a separate feature bit for macro fusion support instead of basing it...

author Craig Topper <craig.topper@intel.com>

Wed, 30 Aug 2017 04:34:48 +0000 (04:34 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 30 Aug 2017 04:34:48 +0000 (04:34 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 30 Aug 2017 04:34:48 +0000 (04:34 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 30 Aug 2017 04:34:48 +0000 (04:34 +0000)
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td

index 37a7cdd779d52821f89338ab9c7bc38784e862f2..888af176a86de1c5305133b6e05712b24c8d944c 100644 (file)
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -288,6 +288,13 @@ def FeatureERMSB
            "ermsb", "HasERMSB", "true",
            "REP MOVS/STOS are fast">;
  
+// Sandy Bridge and newer processors have many instructions that can be
+// fused with conditional branches and pass through the CPU as a single
+// operation.
+def FeatureMacroFusion
+    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
+                 "Various instructions can be fused with conditional branches">;
+
  //===----------------------------------------------------------------------===//
  // X86 processors supported.
  //===----------------------------------------------------------------------===//
@@ -372,7 +379,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
    FeatureFXSR,
    FeatureCMPXCHG16B,
    FeatureSlowBTMem,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  def : ProcessorModel<"penryn", SandyBridgeModel, [
    FeatureX87,
@@ -382,7 +390,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
    FeatureFXSR,
    FeatureCMPXCHG16B,
    FeatureSlowBTMem,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  
  // Atom CPUs.
@@ -468,7 +477,8 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
    FeatureCMPXCHG16B,
    FeatureSlowBTMem,
    FeaturePOPCNT,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  def : NehalemProc<"nehalem">;
  def : NehalemProc<"corei7">;
@@ -485,7 +495,8 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
    FeaturePOPCNT,
    FeatureAES,
    FeaturePCLMUL,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  def : WestmereProc<"westmere">;
  
@@ -516,7 +527,8 @@ def SNBFeatures : ProcessorFeatures<[], [
    FeatureLAHFSAHF,
    FeatureSlow3OpsLEA,
    FeatureFastScalarFSQRT,
-  FeatureFastSHLDRotate
+  FeatureFastSHLDRotate,
+  FeatureMacroFusion
  ]>;
  
  class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@@ -731,7 +743,8 @@ def : Proc<"bdver1", [
    FeatureXSAVE,
    FeatureLWP,
    FeatureSlowSHLD,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  // Piledriver
  def : Proc<"bdver2", [
@@ -755,7 +768,8 @@ def : Proc<"bdver2", [
    FeatureLWP,
    FeatureFMA,
    FeatureSlowSHLD,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  
  // Steamroller
@@ -782,7 +796,8 @@ def : Proc<"bdver3", [
    FeatureXSAVEOPT,
    FeatureSlowSHLD,
    FeatureFSGSBase,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureMacroFusion
  ]>;
  
  // Excavator
@@ -810,7 +825,8 @@ def : Proc<"bdver4", [
    FeatureSlowSHLD,
    FeatureFSGSBase,
    FeatureLAHFSAHF,
-  FeatureMWAITX
+  FeatureMWAITX,
+  FeatureMacroFusion
  ]>;
  
  // Znver1
@@ -830,6 +846,7 @@ def: ProcessorModel<"znver1", Znver1Model, [
    FeatureFastLZCNT,
    FeatureLAHFSAHF,
    FeatureLZCNT,
+  FeatureMacroFusion,
    FeatureMMX,
    FeatureMOVBE,
    FeatureMWAITX,
@@ -873,7 +890,8 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
    Feature64Bit,
    FeatureSlow3OpsLEA,
    FeatureSlowBTMem,
-  FeatureSlowIncDec
+  FeatureSlowIncDec,
+  FeatureMacroFusion
  ]>;
  
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp

index 8fdf10617059a6ec1e5b3c0f06394260c37bf2e8..d3ef7aa8d6c63d5a43344d7318c22a33ad3d5020 100644 (file)
--- a/lib/Target/X86/X86MacroFusion.cpp
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -27,10 +27,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
                                     const MachineInstr *FirstMI,
                                     const MachineInstr &SecondMI) {
    const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
-  // Check if this processor supports macro-fusion. Since this is a minor
-  // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
-  // proxy for SandyBridge+.
-  if (!ST.hasAVX())
+  // Check if this processor supports macro-fusion.
+  if (!ST.hasMacroFusion())
      return false;
  
    enum {
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp

index 6ad6da95d7b00d22fe1c6f9246bdad46d7cee345..2a7733996c4b675ebf0ba7d66c71a25643cdf60e 100644 (file)
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -347,6 +347,7 @@ void X86Subtarget::initializeEnvironment() {
    HasFastVectorFSQRT = false;
    HasFastLZCNT = false;
    HasFastSHLDRotate = false;
+  HasMacroFusion = false;
    HasERMSB = false;
    HasSlowDivide32 = false;
    HasSlowDivide64 = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h

index 8b869022d761e181ec71486fb94bceccf90cff73..7c85e9c2eee01cbc6ae5d8bd64b1c81f96c05821 100644 (file)
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -238,6 +238,9 @@ protected:
    /// True if SHLD based rotate is fast.
    bool HasFastSHLDRotate;
  
+  /// True if the processor supports macrofusion.
+  bool HasMacroFusion;
+
    /// True if the processor has enhanced REP MOVSB/STOSB.
    bool HasERMSB;
  
@@ -488,6 +491,7 @@ public:
    bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
    bool hasFastLZCNT() const { return HasFastLZCNT; }
    bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+  bool hasMacroFusion() const { return HasMacroFusion; }
    bool hasERMSB() const { return HasERMSB; }
    bool hasSlowDivide32() const { return HasSlowDivide32; }
    bool hasSlowDivide64() const { return HasSlowDivide64; }
diff --git a/test/CodeGen/X86/avx-select.ll b/test/CodeGen/X86/avx-select.ll

index 7484f8257ca1e8b8ce4e4842f8c802d677433230..f5ab0cab17f551e8c5ed5b263ed3e9de8c74cd90 100644 (file)
--- a/test/CodeGen/X86/avx-select.ll
+++ b/test/CodeGen/X86/avx-select.ll
@@ -16,8 +16,8 @@ define <8 x i32> @select00(i32 %a, <8 x i32> %b) nounwind {
  ;
  ; X64-LABEL: select00:
  ; X64:       # BB#0:
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64-NEXT:    cmpl $255, %edi
+; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64-NEXT:    je .LBB0_2
  ; X64-NEXT:  # BB#1:
  ; X64-NEXT:    vmovaps %ymm0, %ymm1
@@ -44,8 +44,8 @@ define <4 x i64> @select01(i32 %a, <4 x i64> %b) nounwind {
  ;
  ; X64-LABEL: select01:
  ; X64:       # BB#0:
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64-NEXT:    cmpl $255, %edi
+; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; X64-NEXT:    je .LBB1_2
  ; X64-NEXT:  # BB#1:
  ; X64-NEXT:    vmovaps %ymm0, %ymm1
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll

index 91d1f64c6706c242b7d7ee257603d4f8714b9e46..0f3f3e5fb6e3db93595ef16a1846c1cc8ec5517f 100644 (file)
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -60,8 +60,8 @@ define <8 x float> @funcE() nounwind {
  ; CHECK-LABEL: funcE:
  ; CHECK:       # BB#0: # %for_exit499
  ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    # implicit-def: %YMM0
  ; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    # implicit-def: %YMM0
  ; CHECK-NEXT:    jne .LBB4_2
  ; CHECK-NEXT:  # BB#1: # %load.i1247
  ; CHECK-NEXT:    pushq %rbp
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll

index f6d752ddc3c87ff2328d481bbe1366feb7c30dc0..77a2a021416ffb83f225f29253a0c94fcb3b4468 100644 (file)
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -692,8 +692,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
  ;
  ; AVX512BW-LABEL: test8:
  ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
  ; AVX512BW-NEXT:    cmpl %esi, %edi
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
  ; AVX512BW-NEXT:    jg LBB17_1
  ; AVX512BW-NEXT:  ## BB#2:
  ; AVX512BW-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
@@ -708,8 +708,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
  ;
  ; AVX512DQ-LABEL: test8:
  ; AVX512DQ:       ## BB#0:
-; AVX512DQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
  ; AVX512DQ-NEXT:    cmpl %esi, %edi
+; AVX512DQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
  ; AVX512DQ-NEXT:    jg LBB17_1
  ; AVX512DQ-NEXT:  ## BB#2:
  ; AVX512DQ-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll

index 7cb1c95cb01a6b3db1631feee086154f45974616..3e36969f879c955b57e6f05ce672d3757fdc69f3 100644 (file)
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1678,8 +1678,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
  ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
  ; VEX-NEXT:  .LBB39_6:
  ; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; VEX-NEXT:    testq %rax, %rax
+; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; VEX-NEXT:    js .LBB39_8
  ; VEX-NEXT:  # BB#7:
  ; VEX-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm1
@@ -1914,8 +1914,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
  ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
  ; VEX-NEXT:  .LBB41_6:
  ; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; VEX-NEXT:    testq %rax, %rax
+; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; VEX-NEXT:    js .LBB41_8
  ; VEX-NEXT:  # BB#7:
  ; VEX-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm1
diff --git a/test/CodeGen/X86/x86-cmov-converter.ll b/test/CodeGen/X86/x86-cmov-converter.ll

index cdb8894bfd91656c89afc6baaeaa5761778ca399..5fec1380e14bb6e3cfba01c4b88f55b4456948a2 100644 (file)
--- a/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/test/CodeGen/X86/x86-cmov-converter.ll
@@ -296,9 +296,9 @@ while.end:                                        ; preds = %while.body, %entry
  ; CHECK-LABEL: Transform
  ; CHECK-NOT: cmov
  ; CHECK:         divl    [[a:%[0-9a-z]*]]
-; CHECK:         cmpl    [[a]], %eax
  ; CHECK:         movl    $11, [[s1:%[0-9a-z]*]]
  ; CHECK:         movl    [[a]], [[s2:%[0-9a-z]*]]
+; CHECK:         cmpl    [[a]], %edx
  ; CHECK:         ja      [[SinkBB:.*]]
  ; CHECK: [[FalseBB:.*]]:
  ; CHECK:         movl    $22, [[s1]]
author	Craig Topper <craig.topper@intel.com>
	Wed, 30 Aug 2017 04:34:48 +0000 (04:34 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 30 Aug 2017 04:34:48 +0000 (04:34 +0000)
lib/Target/X86/X86.td		patch \| blob \| history
lib/Target/X86/X86MacroFusion.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.h		patch \| blob \| history
test/CodeGen/X86/avx-select.ll		patch \| blob \| history
test/CodeGen/X86/avx-splat.ll		patch \| blob \| history
test/CodeGen/X86/avx512-mask-op.ll		patch \| blob \| history
test/CodeGen/X86/vec_int_to_fp.ll		patch \| blob \| history
test/CodeGen/X86/x86-cmov-converter.ll		patch \| blob \| history