From d5e571b1582b59d0bf7726ec69a72590849c3916 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 May 2019 07:38:26 +0000 Subject: [PATCH] [X86] Remove patterns for X86VSintToFP/X86VUintToFP+loadv4f32 to v2f64. These patterns can incorrectly narrow a volatile load from 128-bits to 64-bits. Similar to PR42079. Switch to using (v4i32 (bitcast (v2i64 (scalar_to_vector (loadi64))))) as the load pattern used in the instructions. This probably still has issues in 32-bit mode where loadi64 isn't legal. Maybe we should use VZMOVL for widened loads even when we don't need the upper bits as zeroes? git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362203 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 63 +++++-------------------- lib/Target/X86/X86InstrSSE.td | 14 +++--- test/CodeGen/X86/avx512-cvt-widen.ll | 12 ++--- test/CodeGen/X86/avx512-cvt.ll | 12 ++--- test/CodeGen/X86/vec_int_to_fp-widen.ll | 6 ++- test/CodeGen/X86/vec_int_to_fp.ll | 6 ++- 6 files changed, 36 insertions(+), 77 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f6e4e851192..753f1b71b07 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7608,7 +7608,8 @@ multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp, - RegisterClass MaskRC = _.KRCWM> { + RegisterClass MaskRC = _.KRCWM, + dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { defm rr : AVX512_maskable_common opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), (ins MaskRC:$mask, MemOp:$src), OpcodeStr#Alias, "$src", "$src", - (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src)))), + LdDAG, (vselect MaskRC:$mask, (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), @@ -7683,53 +7683,10 @@ multiclass avx512_vcvt_fpextend opc, string OpcodeStr, X86VectorVTInfo _ X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp, - RegisterClass MaskRC = _.KRCWM, - PatFrag LdFrag = !cast("extload"#_Src.VTName)> { - - defm rr : AVX512_maskable_common, - EVEX, Sched<[sched]>; - - defm rm : AVX512_maskable_common, - EVEX, Sched<[sched.Folded]>; - - defm rmb : AVX512_maskable_common, - EVEX, EVEX_B, Sched<[sched.Folded]>; -} + RegisterClass MaskRC = _.KRCWM> + : avx512_vcvt_fp("extload"#_Src.VTName) addr:$src))>; // Extend Float to Double multiclass avx512_cvtps2pd opc, string OpcodeStr, @@ -7910,7 +7867,11 @@ multiclass avx512_cvtdq2pd opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; + OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM, + (v2f64 (OpNode128 (bc_v4i32 + (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))>, + EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 18d9af8bdcd..23aea3ea908 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1655,7 +1655,10 @@ let hasSideEffects = 0, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>, + (v2f64 (X86VSintToFP + (bc_v4i32 + (v2i64 (scalar_to_vector + (loadi64 addr:$src)))))))]>, VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1679,7 +1682,10 @@ let hasSideEffects = 0, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>, + (v2f64 (X86VSintToFP + (bc_v4i32 + (v2i64 (scalar_to_vector + (loadi64 addr:$src)))))))]>, Sched<[WriteCvtI2PDLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1689,16 +1695,12 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // AVX register conversion intrinsics let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), - (VCVTDQ2PDrm addr:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), (VCVTDQ2PDrm addr:$src)>; } // Predicates = [HasAVX, NoVLX] // SSE2 register conversion intrinsics let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), - (CVTDQ2PDrm addr:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), (CVTDQ2PDrm addr:$src)>; } // Predicates = [UseSSE2] diff --git a/test/CodeGen/X86/avx512-cvt-widen.ll b/test/CodeGen/X86/avx512-cvt-widen.ll index 5b991847d68..90631baaf47 100644 --- a/test/CodeGen/X86/avx512-cvt-widen.ll +++ b/test/CodeGen/X86/avx512-cvt-widen.ll @@ -2558,16 +2558,14 @@ define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) { ; VLDQ-LABEL: test_sito2f64_mask_load: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vpmovq2m %xmm0, %k1 -; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z} ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: test_sito2f64_mask_load: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 -; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z} ; VLNODQ-NEXT: retq ; ; DQNOVL-LABEL: test_sito2f64_mask_load: @@ -2611,16 +2609,14 @@ define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) { ; VLDQ-LABEL: test_uito2f64_mask_load: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vpmovq2m %xmm0, %k1 -; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z} +; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z} ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: test_uito2f64_mask_load: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 -; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z} ; VLNODQ-NEXT: retq ; ; DQNOVL-LABEL: test_uito2f64_mask_load: diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 2d1202a9200..2b5112650a9 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -2533,16 +2533,14 @@ define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) { ; VLDQ-LABEL: test_sito2f64_mask_load: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vpmovq2m %xmm0, %k1 -; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z} ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: test_sito2f64_mask_load: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 -; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z} ; VLNODQ-NEXT: retq ; ; DQNOVL-LABEL: test_sito2f64_mask_load: @@ -2586,16 +2584,14 @@ define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) { ; VLDQ-LABEL: test_uito2f64_mask_load: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vpmovq2m %xmm0, %k1 -; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z} +; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z} ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: test_uito2f64_mask_load: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 -; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z} ; VLNODQ-NEXT: retq ; ; DQNOVL-LABEL: test_uito2f64_mask_load: diff --git a/test/CodeGen/X86/vec_int_to_fp-widen.ll b/test/CodeGen/X86/vec_int_to_fp-widen.ll index eb85beb5398..78bc214358b 100644 --- a/test/CodeGen/X86/vec_int_to_fp-widen.ll +++ b/test/CodeGen/X86/vec_int_to_fp-widen.ll @@ -3158,12 +3158,14 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) { define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) { ; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64: ; SSE: # %bb.0: -; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 +; SSE-NEXT: movaps (%rdi), %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 +; AVX-NEXT: vmovaps (%rdi), %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load volatile <4 x i32>, <4 x i32> *%a %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index d394383ddc2..9b543075f3b 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -3158,12 +3158,14 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) { define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) { ; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64: ; SSE: # %bb.0: -; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 +; SSE-NEXT: movaps (%rdi), %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 +; AVX-NEXT: vmovaps (%rdi), %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load volatile <4 x i32>, <4 x i32> *%a %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> -- 2.50.1