From: Craig Topper Date: Sat, 14 Oct 2017 07:04:48 +0000 (+0000) Subject: [X86] Add patterns for vzmovl+cvtpd2dq/cvttpd2dq with a load. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1d2af6aa6e7874ac03597067833c23eb9ba731a4;p=llvm [X86] Add patterns for vzmovl+cvtpd2dq/cvttpd2dq with a load. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315802 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f68516a34ce..cdc364970ab 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7031,12 +7031,18 @@ let Predicates = [HasAVX512, HasVLX] in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), (VCVTPD2DQZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), + (VCVTPD2DQZ128rm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), (VCVTPD2UDQZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), (VCVTTPD2DQZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), + (VCVTTPD2DQZ128rm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), (VCVTTPD2UDQZ128rr VR128X:$src)>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 556c64c4ea0..279359674af 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1889,9 +1889,15 @@ let Predicates = [HasAVX, NoVLX] in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), (VCVTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), + (VCVTPD2DQrm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), (VCVTTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), + (VCVTTPD2DQrm addr:$src)>; } } // Predicates = [HasAVX] @@ -1911,9 +1917,15 @@ let Predicates = [UseSSE2] in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), (CVTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))), + (CVTPD2DQrm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), (CVTTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))), + (CVTTPD2DQrm addr:$src)>; } } // Predicates = [UseSSE2] @@ -2071,7 +2083,7 @@ let Predicates = [UseSSE2] in { (v4f32 (X86vfpround (v2f64 VR128:$src)))))), (CVTPD2PSrr VR128:$src)>; def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (v4f32 (X86vfpround (memopv2f64 addr:$src)))))), (CVTPD2PSrm addr:$src)>; } } diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index a1fec401a4a..d4047faad9b 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -273,6 +273,32 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind { } +define <2 x i64> @test_mm_cvtpd_epi32_zext_load(<2 x double>* %p0) nounwind { +; SSE-LABEL: test_mm_cvtpd_epi32_zext_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SSE-NEXT: cvtpd2dq (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x00] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_mm_cvtpd_epi32_zext_load: +; AVX2: ## BB#0: +; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX2-NEXT: vcvtpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x00] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_mm_cvtpd_epi32_zext_load: +; SKX: ## BB#0: +; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SKX-NEXT: vcvtpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x00] +; SKX-NEXT: retl ## encoding: [0xc3] + %a0 = load <2 x double>, <2 x double>* %p0 + %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvtpd2ps: ; SSE: ## BB#0: @@ -542,6 +568,32 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind { } +define <2 x i64> @test_mm_cvttpd_epi32_zext_load(<2 x double>* %p0) nounwind { +; SSE-LABEL: test_mm_cvttpd_epi32_zext_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SSE-NEXT: cvttpd2dq (%eax), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x00] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_mm_cvttpd_epi32_zext_load: +; AVX2: ## BB#0: +; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX2-NEXT: vcvttpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x00] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_mm_cvttpd_epi32_zext_load: +; SKX: ## BB#0: +; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SKX-NEXT: vcvttpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x00] +; SKX-NEXT: retl ## encoding: [0xc3] + %a0 = load <2 x double>, <2 x double>* %p0 + %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { ; SSE-LABEL: test_x86_sse2_cvttps2dq: ; SSE: ## BB#0: @@ -710,21 +762,21 @@ define <8 x i16> @test_x86_sse2_packssdw_128_fold() { ; SSE: ## BB#0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI33_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packssdw_128_fold: ; AVX2: ## BB#0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packssdw_128_fold: ; SKX: ## BB#0: -; SKX-NEXT: vmovaps LCPI33_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] +; SKX-NEXT: vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) ret <8 x i16> %res @@ -757,21 +809,21 @@ define <16 x i8> @test_x86_sse2_packsswb_128_fold() { ; SSE: ## BB#0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI37_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packsswb_128_fold: ; AVX2: ## BB#0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packsswb_128_fold: ; SKX: ## BB#0: -; SKX-NEXT: vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI37_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res @@ -804,21 +856,21 @@ define <16 x i8> @test_x86_sse2_packuswb_128_fold() { ; SSE: ## BB#0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI37_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI39_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packuswb_128_fold: ; AVX2: ## BB#0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI39_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packuswb_128_fold: ; SKX: ## BB#0: -; SKX-NEXT: vmovaps LCPI37_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI39_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI39_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res