From 65c20a7766b3f455591982e7421c10b06771227c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 14 Oct 2017 05:55:42 +0000 Subject: [PATCH] [X86] Add patterns for vzmovl+cvtpd2ps with a load. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315800 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 12 ++++--- lib/Target/X86/X86InstrSSE.td | 24 ++++++++----- test/CodeGen/X86/sse2-intrinsics-x86.ll | 48 ++++++++++++++++++------- 3 files changed, 60 insertions(+), 24 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 9af9ad85929..f68516a34ce 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6651,10 +6651,14 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; let Predicates = [HasVLX] in { - let AddedComplexity = 15 in - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), - (VCVTPD2PSZ128rr VR128X:$src)>; + let AddedComplexity = 15 in { + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), + (VCVTPD2PSZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (VCVTPD2PSZ128rm addr:$src)>; + } def : Pat<(v2f64 (extloadv2f32 addr:$src)), (VCVTPS2PDZ128rm addr:$src)>; def : Pat<(v4f64 (extloadv4f32 addr:$src)), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 13a418a623b..556c64c4ea0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2054,18 +2054,26 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), let Predicates = [HasAVX, NoVLX] in { // Match fpround and fpextend for 128/256-bit conversions - let AddedComplexity = 15 in - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (VCVTPD2PSrr VR128:$src)>; + let AddedComplexity = 15 in { + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (VCVTPD2PSrm addr:$src)>; + } } let Predicates = [UseSSE2] in { // Match fpround and fpextend for 128 conversions - let AddedComplexity = 15 in - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (CVTPD2PSrr VR128:$src)>; + let AddedComplexity = 15 in { + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (CVTPD2PSrm addr:$src)>; + } } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 7324e855088..a1fec401a4a 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -313,6 +313,30 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind { ret <4 x float> %res } +define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(<2 x double>* %p0) nounwind { +; SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SSE-NEXT: cvtpd2ps (%eax), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x00] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_cvtpd2ps_zext_load: +; AVX2: ## BB#0: +; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX2-NEXT: vcvtpd2psx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x00] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_cvtpd2ps_zext_load: +; SKX: ## BB#0: +; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SKX-NEXT: vcvtpd2psx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x00] +; SKX-NEXT: retl ## encoding: [0xc3] + %a0 = load <2 x double>, <2 x double>* %p0 + %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) + %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res +} + define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { ; SSE-LABEL: test_x86_sse2_cvtps2dq: ; SSE: ## BB#0: @@ -686,21 +710,21 @@ define <8 x i16> @test_x86_sse2_packssdw_128_fold() { ; SSE: ## BB#0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI32_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI33_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packssdw_128_fold: ; AVX2: ## BB#0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packssdw_128_fold: ; SKX: ## BB#0: -; SKX-NEXT: vmovaps LCPI32_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] +; SKX-NEXT: vmovaps LCPI33_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) ret <8 x i16> %res @@ -733,21 +757,21 @@ define <16 x i8> @test_x86_sse2_packsswb_128_fold() { ; SSE: ## BB#0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI34_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packsswb_128_fold: ; AVX2: ## BB#0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packsswb_128_fold: ; SKX: ## BB#0: -; SKX-NEXT: vmovaps LCPI34_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res @@ -780,21 +804,21 @@ define <16 x i8> @test_x86_sse2_packuswb_128_fold() { ; SSE: ## BB#0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI36_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI37_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packuswb_128_fold: ; AVX2: ## BB#0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packuswb_128_fold: ; SKX: ## BB#0: -; SKX-NEXT: vmovaps LCPI36_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI37_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res -- 2.40.0