[X86] Remove some intrinsic instructions from hasPartialRegUpdate

author Craig Topper <craig.topper@gmail.com>

Mon, 12 Dec 2016 05:07:17 +0000 (05:07 +0000)

committer Craig Topper <craig.topper@gmail.com>

Mon, 12 Dec 2016 05:07:17 +0000 (05:07 +0000)
author Craig Topper <craig.topper@gmail.com>
Mon, 12 Dec 2016 05:07:17 +0000 (05:07 +0000)
committer Craig Topper <craig.topper@gmail.com>
Mon, 12 Dec 2016 05:07:17 +0000 (05:07 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 1679f99ddeb50c5dc0e3880669b8ee9da41b7afc..ca0004abc4ccc71b33e105525e310fd011f3bea0 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -7242,12 +7242,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
    case X86::CVTSI2SD64rm:
    case X86::CVTSD2SSrr:
    case X86::CVTSD2SSrm:
-  case X86::Int_CVTSD2SSrr:
-  case X86::Int_CVTSD2SSrm:
    case X86::CVTSS2SDrr:
    case X86::CVTSS2SDrm:
-  case X86::Int_CVTSS2SDrr:
-  case X86::Int_CVTSS2SDrm:
    case X86::MOVHPDrm:
    case X86::MOVHPSrm:
    case X86::MOVLPDrm:
@@ -7258,12 +7254,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
    case X86::RCPSSm_Int:
    case X86::ROUNDSDr:
    case X86::ROUNDSDm:
-  case X86::ROUNDSDr_Int:
-  case X86::ROUNDSDm_Int:
    case X86::ROUNDSSr:
    case X86::ROUNDSSm:
-  case X86::ROUNDSSr_Int:
-  case X86::ROUNDSSm_Int:
    case X86::RSQRTSSr:
    case X86::RSQRTSSm:
    case X86::RSQRTSSr_Int:
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

index 9d1ab922d964f2dacb00d0a412286803b2f6d515..cf10691b2abbdee5a8aeff586f475349e5187004 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1227,14 +1227,12 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
  ; X32-LABEL: test_mm_cvtsd_ss_load:
  ; X32:       # BB#0:
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movaps (%eax), %xmm1
-; X32-NEXT:    cvtsd2ss %xmm1, %xmm0
+; X32-NEXT:    cvtsd2ss (%eax), %xmm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm_cvtsd_ss_load:
  ; X64:       # BB#0:
-; X64-NEXT:    movaps (%rdi), %xmm1
-; X64-NEXT:    cvtsd2ss %xmm1, %xmm0
+; X64-NEXT:    cvtsd2ss (%rdi), %xmm0
  ; X64-NEXT:    retq
    %a1 = load <2 x double>, <2 x double>* %p1
    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll

index 747bee127e3d1f650c5d715ed9986b384e97cef0..694e303e635eb8e5544e1e76f751bb8872d0b9db 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -370,8 +370,7 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %
  ; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
  ; SSE:       ## BB#0:
  ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT:    movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
+; SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
  ; SSE-NEXT:    retl ## encoding: [0xc3]
  ;
  ; VCHECK-LABEL: test_x86_sse2_cvtsd2ss_load:
@@ -444,8 +443,7 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>*
  ; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
  ; SSE:       ## BB#0:
  ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT:    movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
+; SSE-NEXT:    cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
  ; SSE-NEXT:    retl ## encoding: [0xc3]
  ;
  ; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load:
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll

index b77f472faf340c257a8169c3dc19826559ff4c7c..c17ec8c3593e983d19943e105a4ac2662a0c4e75 100644 (file)
--- a/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -467,6 +467,24 @@ define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1)
  declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
  
  
+define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) {
+; SSE41-LABEL: test_x86_sse41_round_sd_load:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SSE41-NEXT:    roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07]
+; SSE41-NEXT:    retl ## encoding: [0xc3]
+;
+; VCHECK-LABEL: test_x86_sse41_round_sd_load:
+; VCHECK:       ## BB#0:
+; VCHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; VCHECK-NEXT:    vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07]
+; VCHECK-NEXT:    retl ## encoding: [0xc3]
+  %a1b = load <2 x double>, <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+
+
  define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
  ; SSE41-LABEL: test_x86_sse41_round_ss:
  ; SSE41:       ## BB#0:
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll

index bd207c99dbdbca9b6082a8c990f4512a8d331d9d..8dfb8ee70076c166c78588e04577997c49e04f8e 100644 (file)
--- a/test/CodeGen/X86/sse_partial_update.ll
+++ b/test/CodeGen/X86/sse_partial_update.ll
@@ -98,9 +98,8 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
  define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) {
  ; CHECK-LABEL: load_fold_cvtss2sd_int:
  ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movaps (%rdi), %xmm1
  ; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    cvtss2sd %xmm1, %xmm0
+; CHECK-NEXT:    cvtss2sd (%rdi), %xmm0
  ; CHECK-NEXT:    retq
    %ld = load <4 x float>, <4 x float> *%a
    %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll

index 591f6e6ced1af1b3c355a3bc87ec9672144a9144..edb5940fa305acd758325998730b9cb5cfebac31 100644 (file)
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -200,8 +200,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
  ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  ; X32-NEXT:    movaps %xmm0, (%esp) ## 16-byte Spill
  ; X32-NEXT:    calll _f
-; X32-NEXT:    movaps (%esp), %xmm1 ## 16-byte Reload
-; X32-NEXT:    roundss $4, %xmm1, %xmm0
+; X32-NEXT:    roundss $4, (%esp), %xmm0 ## 16-byte Folded Reload
  ; X32-NEXT:    addl $28, %esp
  ; X32-NEXT:    retl
  ;
@@ -211,8 +210,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
  ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  ; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
  ; X64-NEXT:    callq _f
-; X64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
-; X64-NEXT:    roundss $4, %xmm1, %xmm0
+; X64-NEXT:    roundss $4, (%rsp), %xmm0 ## 16-byte Folded Reload
  ; X64-NEXT:    addq $24, %rsp
  ; X64-NEXT:    retq
  ;
author	Craig Topper <craig.topper@gmail.com>
	Mon, 12 Dec 2016 05:07:17 +0000 (05:07 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Mon, 12 Dec 2016 05:07:17 +0000 (05:07 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/sse2-intrinsics-fast-isel.ll		patch \| blob \| history
test/CodeGen/X86/sse2-intrinsics-x86.ll		patch \| blob \| history
test/CodeGen/X86/sse41-intrinsics-x86.ll		patch \| blob \| history
test/CodeGen/X86/sse_partial_update.ll		patch \| blob \| history
test/CodeGen/X86/vec_ss_load_fold.ll		patch \| blob \| history