[X86] Prevent fast isel from folding loads into the instructions listed in hasPartial...

author Craig Topper <craig.topper@intel.com>

Wed, 1 Nov 2017 18:10:06 +0000 (18:10 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 1 Nov 2017 18:10:06 +0000 (18:10 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 1 Nov 2017 18:10:06 +0000 (18:10 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 1 Nov 2017 18:10:06 +0000 (18:10 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 276ceae7cdc56ad8b0af1de9a9bb9ad375fb5987..a2ec1f4f469148ebb7232378faa99ef9398a93e4 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -8389,6 +8389,11 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
         MI.getOpcode() == X86::PUSH64r))
      return nullptr;
  
+  // Avoid partial register update stalls unless optimizing for size.
+  // TODO: we should block undef reg update as well.
+  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
+    return nullptr;
+
    unsigned NumOps = MI.getDesc().getNumOperands();
    bool isTwoAddr =
        NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -8554,6 +8559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
  
    // Unless optimizing for size, don't fold to avoid partial
    // register update stalls
+  // TODO: we should block undef reg update as well.
    if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
      return nullptr;
  
@@ -8752,6 +8758,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
    if (NoFusing) return nullptr;
  
    // Avoid partial register update stalls unless optimizing for size.
+  // TODO: we should block undef reg update as well.
    if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
      return nullptr;
  
diff --git a/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll b/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll

index 1035c2567907f1b7380544253af153a3a8014b24..4a3337554b61085b11de7ad40b9ef9c3769d0145 100644 (file)
--- a/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
+++ b/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -55,7 +55,8 @@ entry:
  define double @single_to_double_rm(float* %x) {
  ; SSE-LABEL: single_to_double_rm:
  ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: single_to_double_rm:
@@ -69,10 +70,28 @@ entry:
    ret double %conv
  }
  
+define double @single_to_double_rm_optsize(float* %x) optsize {
+; SSE-LABEL: single_to_double_rm_optsize:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: single_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load float, float* %x, align 4
+  %conv = fpext float %0 to double
+  ret double %conv
+}
+
  define float @double_to_single_rm(double* %x) {
  ; SSE-LABEL: double_to_single_rm:
  ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: double_to_single_rm:
@@ -85,3 +104,20 @@ entry:
    %conv = fptrunc double %0 to float
    ret float %conv
  }
+
+define float @double_to_single_rm_optsize(double* %x) optsize {
+; SSE-LABEL: double_to_single_rm_optsize:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: double_to_single_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load double, double* %x, align 8
+  %conv = fptrunc double %0 to float
+  ret float %conv
+}
diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll

index 432e190a745435dd5dad32ada094df0fa37e5028..50eddab2b452847912b4022cac5ada1c4ba4c545 100644 (file)
--- a/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
@@ -21,7 +21,8 @@ entry:
  define double @long_to_double_rm(i64* %a) {
  ; SSE2-LABEL: long_to_double_rm:
  ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
  ; SSE2-NEXT:    retq
  ;
  ; AVX-LABEL: long_to_double_rm:
@@ -34,6 +35,22 @@ entry:
    ret double %1
  }
  
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_double_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to double
+  ret double %1
+}
+
  define float @long_to_float_rr(i64 %a) {
  ; SSE2-LABEL: long_to_float_rr:
  ; SSE2:       # BB#0: # %entry
@@ -52,7 +69,8 @@ entry:
  define float @long_to_float_rm(i64* %a) {
  ; SSE2-LABEL: long_to_float_rm:
  ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
  ; SSE2-NEXT:    retq
  ;
  ; AVX-LABEL: long_to_float_rm:
@@ -64,3 +82,19 @@ entry:
    %1 = sitofp i64 %0 to float
    ret float %1
  }
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_float_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_float_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to float
+  ret float %1
+}
diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/test/CodeGen/X86/fast-isel-int-float-conversion.ll

index d722d37ee6bdf9c18227a9d3dd345531e8be6be2..57b50abab535cf3a7ed5055515ca5ab820bd428e 100644 (file)
--- a/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -58,7 +58,8 @@ entry:
  define double @int_to_double_rm(i32* %a) {
  ; SSE2-LABEL: int_to_double_rm:
  ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2sdl %eax, %xmm0
  ; SSE2-NEXT:    retq
  ;
  ; AVX-LABEL: int_to_double_rm:
@@ -107,6 +108,58 @@ entry:
    ret double %1
  }
  
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_double_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_double_rm_optsize:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    .cfi_offset %ebp, -8
+; SSE2_X86-NEXT:    movl %esp, %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT:    andl $-8, %esp
+; SSE2_X86-NEXT:    subl $8, %esp
+; SSE2_X86-NEXT:    movl 8(%ebp), %eax
+; SSE2_X86-NEXT:    cvtsi2sdl (%eax), %xmm0
+; SSE2_X86-NEXT:    movsd %xmm0, (%esp)
+; SSE2_X86-NEXT:    fldl (%esp)
+; SSE2_X86-NEXT:    movl %ebp, %esp
+; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa %esp, 4
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtsi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = sitofp i32 %0 to double
+  ret double %1
+}
+
  define float @int_to_float_rr(i32 %a) {
  ; SSE2-LABEL: int_to_float_rr:
  ; SSE2:       # BB#0: # %entry
@@ -148,7 +201,8 @@ entry:
  define float @int_to_float_rm(i32* %a) {
  ; SSE2-LABEL: int_to_float_rm:
  ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2ssl %eax, %xmm0
  ; SSE2-NEXT:    retq
  ;
  ; AVX-LABEL: int_to_float_rm:
@@ -184,3 +238,43 @@ entry:
    %1 = sitofp i32 %0 to float
    ret float %1
  }
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_float_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_float_rm_optsize:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT:    cvtsi2ssl (%eax), %xmm0
+; SSE2_X86-NEXT:    movss %xmm0, (%esp)
+; SSE2_X86-NEXT:    flds (%esp)
+; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 4
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtsi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = sitofp i32 %0 to float
+  ret float %1
+}
author	Craig Topper <craig.topper@intel.com>
	Wed, 1 Nov 2017 18:10:06 +0000 (18:10 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 1 Nov 2017 18:10:06 +0000 (18:10 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/fast-isel-fptrunc-fpext.ll		patch \| blob \| history
test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll		patch \| blob \| history
test/CodeGen/X86/fast-isel-int-float-conversion.ll		patch \| blob \| history