From d1ecc080dcc117f45d5c40fe1625f42517674fbc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 Feb 2017 20:43:32 +0000 Subject: [PATCH] [X86][SSE] Add (V)MOVD folding pattern with zextloadi64i32 load node. Fixes PRPR31309 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295492 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 2 ++ lib/Target/X86/X86InstrSSE.td | 4 ++++ test/CodeGen/X86/merge-consecutive-loads-128.ll | 8 +++----- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f94ba848b3d..ca79f320611 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3552,6 +3552,8 @@ let Predicates = [HasAVX512] in { } // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. let AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), + (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d2dfb1b5d7d..ec17ae083ad 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4822,6 +4822,8 @@ let Predicates = [UseAVX] in { // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. // These instructions also write zeros in the high part of a 256-bit register. let AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), + (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), @@ -4851,6 +4853,8 @@ let Predicates = [UseSSE2] in { (MOV64toPQIrr GR64:$src)>; } let AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), + (MOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (MOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index 82df614a9c5..e498eb78337 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -1137,18 +1137,16 @@ define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwt ; Extension tests. ; -; FIXME: PR31309 +; PR31309 define <4 x i32> @load_i32_zext_i128_v4i32(i32* %ptr) { ; SSE-LABEL: load_i32_zext_i128_v4i32: ; SSE: # BB#0: -; SSE-NEXT: movl (%rdi), %eax -; SSE-NEXT: movd %rax, %xmm0 +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: load_i32_zext_i128_v4i32: ; AVX: # BB#0: -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: retq ; ; X32-SSE1-LABEL: load_i32_zext_i128_v4i32: -- 2.50.1