[X86] Enable sse2_cvtsd2ss intrinsic to use an EVEX encoded instruction.

author Craig Topper <craig.topper@intel.com>

Mon, 11 Mar 2019 06:01:04 +0000 (06:01 +0000)

committer Craig Topper <craig.topper@intel.com>

Mon, 11 Mar 2019 06:01:04 +0000 (06:01 +0000)
author Craig Topper <craig.topper@intel.com>
Mon, 11 Mar 2019 06:01:04 +0000 (06:01 +0000)
committer Craig Topper <craig.topper@intel.com>
Mon, 11 Mar 2019 06:01:04 +0000 (06:01 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 784aa1752075749a45fa0c92bc88c077fed4b91d..7f1cc8f3be476e28f5facdfa541c403776cefe29 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1215,28 +1215,28 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                         [(set VR128:$dst,
-                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
-                       XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
+                       XD, VEX_4V, VEX_WIG, Requires<[UseAVX]>,
                         Sched<[WriteCvtSD2SS]>;
  def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                         "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
-                                          VR128:$src1, sse_load_f64:$src2))]>,
-                       XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+                       [(set VR128:$dst,
+                         (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>,
+                       XD, VEX_4V, VEX_WIG, Requires<[UseAVX]>,
                         Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
  let Constraints = "$src1 = $dst" in {
  def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
-                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
+                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
                         XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
  def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                         "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
-                                          VR128:$src1, sse_load_f64:$src2))]>,
+                       [(set VR128:$dst,
+                         (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>,
                         XD, Requires<[UseSSE2]>,
                         Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
  }
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h

index 63b6620fae7f343d82a4ffb4fcdaf79816ed30e1..41ef56b0b6f0fb80893403f4f463401b719e7bc4 100644 (file)
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1017,6 +1017,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
    X86_INTRINSIC_DATA(sse2_cvtps2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
    X86_INTRINSIC_DATA(sse2_cvtsd2si,     INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
    X86_INTRINSIC_DATA(sse2_cvtsd2si64,   INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvtsd2ss,     INTR_TYPE_2OP, X86ISD::VFPROUNDS, 0),
    X86_INTRINSIC_DATA(sse2_cvttpd2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
    X86_INTRINSIC_DATA(sse2_cvttps2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
    X86_INTRINSIC_DATA(sse2_cvttsd2si,    INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

index 477ce131df5c247680d87335512fc8d406be5cb9..beb895537ee91d36615555f7fddc118f62b35b0f 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1553,10 +1553,15 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
  ; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
  ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  ;
-; AVX-LABEL: test_mm_cvtsd_ss:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
-; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+; AVX1-LABEL: test_mm_cvtsd_ss:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
+; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+;
+; AVX512-LABEL: test_mm_cvtsd_ss:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
+; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
    ret <4 x float> %res
  }
@@ -1569,21 +1574,32 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
  ; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
  ; X86-SSE-NEXT:    retl # encoding: [0xc3]
  ;
-; X86-AVX-LABEL: test_mm_cvtsd_ss_load:
-; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
-; X86-AVX-NEXT:    retl # encoding: [0xc3]
+; X86-AVX1-LABEL: test_mm_cvtsd_ss_load:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX1-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
+; X86-AVX1-NEXT:    retl # encoding: [0xc3]
+;
+; X86-AVX512-LABEL: test_mm_cvtsd_ss_load:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
+; X86-AVX512-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
  ; X64-SSE:       # %bb.0:
  ; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
  ; X64-SSE-NEXT:    retq # encoding: [0xc3]
  ;
-; X64-AVX-LABEL: test_mm_cvtsd_ss_load:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
-; X64-AVX-NEXT:    retq # encoding: [0xc3]
+; X64-AVX1-LABEL: test_mm_cvtsd_ss_load:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
+; X64-AVX1-NEXT:    retq # encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_mm_cvtsd_ss_load:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
+; X64-AVX512-NEXT:    retq # encoding: [0xc3]
    %a1 = load <2 x double>, <2 x double>* %p1
    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
    ret <4 x float> %res
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll

index f8a9074bc0032c9a5c35521afa9fae9cf6f778d1..44585b77ce0db6e93b8c5aea153e3d42a30e990f 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -423,10 +423,15 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
  ; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
  ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  ;
-; AVX-LABEL: test_x86_sse2_cvtsd2ss:
-; AVX:       ## %bb.0:
-; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
-; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+; AVX1-LABEL: test_x86_sse2_cvtsd2ss:
+; AVX1:       ## %bb.0:
+; AVX1-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
+; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX512-LABEL: test_x86_sse2_cvtsd2ss:
+; AVX512:       ## %bb.0:
+; AVX512-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
+; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    ret <4 x float> %res
  }
@@ -440,21 +445,32 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %
  ; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
  ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
  ;
-; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load:
-; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
-; X86-AVX-NEXT:    retl ## encoding: [0xc3]
+; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
+; X86-AVX1:       ## %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX1-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
+; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
+;
+; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
+; X86-AVX512:       ## %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
+; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
  ;
  ; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
  ; X64-SSE:       ## %bb.0:
  ; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
  ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
  ;
-; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
-; X64-AVX-NEXT:    retq ## encoding: [0xc3]
+; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
+; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
+; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    %a1 = load <2 x double>, <2 x double>* %p1
    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    ret <4 x float> %res
@@ -468,21 +484,32 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x do
  ; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
  ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
  ;
-; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
-; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
-; X86-AVX-NEXT:    retl ## encoding: [0xc3]
+; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
+; X86-AVX1:       ## %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX1-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
+; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
+;
+; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
+; X86-AVX512:       ## %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
+; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
  ;
  ; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
  ; X64-SSE:       ## %bb.0:
  ; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
  ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
  ;
-; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
-; X64-AVX-NEXT:    retq ## encoding: [0xc3]
+; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
+; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
+; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    %a1 = load <2 x double>, <2 x double>* %p1
    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    ret <4 x float> %res
author	Craig Topper <craig.topper@intel.com>
	Mon, 11 Mar 2019 06:01:04 +0000 (06:01 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Mon, 11 Mar 2019 06:01:04 +0000 (06:01 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
lib/Target/X86/X86IntrinsicsInfo.h		patch \| blob \| history
test/CodeGen/X86/sse2-intrinsics-fast-isel.ll		patch \| blob \| history
test/CodeGen/X86/sse2-intrinsics-x86.ll		patch \| blob \| history