From 5b42c82e6a8161a5bdcd6773fbb13887ca00f268 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 10 Mar 2017 22:35:07 +0000 Subject: [PATCH] [X86][SSE] Fix load folding for (V)CVTDQ2PD This only requires a 64-bit memory source, not the whole 128-bits. But the 128-bit case is still supported via X86InstrInfo::foldMemoryOperandImpl git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 4 ++-- test/CodeGen/X86/known-signbits-vector.ll | 3 +-- test/CodeGen/X86/mmx-cvt.ll | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 823467d79bd..f1281280d90 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2165,7 +2165,7 @@ let hasSideEffects = 0, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>, + (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))]>, VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", @@ -2188,7 +2188,7 @@ let hasSideEffects = 0, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))], + (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))], IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index 3a57c06651d..12dfdc025ce 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -5,8 +5,7 @@ define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind { ; X32-LABEL: signbits_sext_v2i64_sitofp_v2f64: ; X32: # BB#0: -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vcvtdq2pd %xmm0, %xmm0 +; X32-NEXT: vcvtdq2pd {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64: diff --git a/test/CodeGen/X86/mmx-cvt.ll b/test/CodeGen/X86/mmx-cvt.ll index 884c4a8dd74..71daf7b4866 100644 --- a/test/CodeGen/X86/mmx-cvt.ll +++ b/test/CodeGen/X86/mmx-cvt.ll @@ -262,8 +262,7 @@ define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind { ; X86-NEXT: movq (%eax), %mm0 ; X86-NEXT: paddd %mm0, %mm0 ; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-NEXT: cvtdq2pd %xmm0, %xmm0 +; X86-NEXT: cvtdq2pd (%esp), %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl -- 2.50.1