From f59d699723bc83e65b89719aef6d5bbd4ce1cef0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 May 2019 06:21:53 +0000 Subject: [PATCH] [X86] Remove avx512 isel patterns for fpextend+load. Prefer to only match fp extloads instead. DAG combine will usually fold fpextend+load to an fp extload anyway. So the 256 and 512 patterns were probably unnecessary. The 128 bit pattern was special in that it looked for a v4f32 load, but then used it in an instruction that only loads 64-bits. This is bad if the load happens to be volatile. We could probably make the patterns volatile aware, but that's more work for something that's probably rare. The peephole pass might kick in and save us anyway. We might also be able to fix this with some additional DAG combines. This also adds patterns for vselect+extload to enabled masked vcvtps2pd to be used. Previously we looked for the unlikely vselect+fpextend+load. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362199 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 68 ++++++++++++++++++++++++++------ test/CodeGen/X86/avx512-cvt.ll | 3 +- test/CodeGen/X86/vec_fpext.ll | 7 ++-- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d93059b44f3..f6e4e851192 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7677,19 +7677,73 @@ multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } +// Similar to avx512_vcvt_fp, but uses an extload for the memory form. +multiclass avx512_vcvt_fpextend opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode, + X86FoldableSchedWrite sched, + string Broadcast = _.BroadcastStr, + string Alias = "", X86MemOperand MemOp = _Src.MemOp, + RegisterClass MaskRC = _.KRCWM, + PatFrag LdFrag = !cast("extload"#_Src.VTName)> { + + defm rr : AVX512_maskable_common, + EVEX, Sched<[sched]>; + + defm rm : AVX512_maskable_common, + EVEX, Sched<[sched.Folded]>; + + defm rmb : AVX512_maskable_common, + EVEX, EVEX_B, Sched<[sched.Folded]>; +} + // Extend Float to Double multiclass avx512_cvtps2pd opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasVLX] in { - defm Z128 : avx512_vcvt_fp, EVEX_V128; - defm Z256 : avx512_vcvt_fp, EVEX_V256; } } @@ -7784,9 +7838,6 @@ let Predicates = [HasAVX512] in { (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; - - def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; } let Predicates = [HasVLX] in { @@ -7819,11 +7870,6 @@ let Predicates = [HasVLX] in { v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; - def : Pat<(v2f64 (extloadv2f32 addr:$src)), - (VCVTPS2PDZ128rm addr:$src)>; - def : Pat<(v4f64 (extloadv4f32 addr:$src)), - (VCVTPS2PDZ256rm addr:$src)>; - // Special patterns to allow use of X86vmfpround for masking. Instruction // patterns have been disabled with null_frag. def : Pat<(X86vfpround (v2f64 VR128X:$src)), diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index aa7f533c1d4..48293c08a5c 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -791,9 +791,8 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x ; ; VL-LABEL: f32to4f64_mask_load: ; VL: # %bb.0: -; VL-NEXT: vcvtps2pd (%rdi), %ymm2 ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 -; VL-NEXT: vmovapd %ymm2, %ymm0 {%k1} {z} +; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z} ; VL-NEXT: retq %b = load <4 x float>, <4 x float>* %p %a = fpext <4 x float> %b to <4 x double> diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll index 082a5336657..46ad1f16f3c 100644 --- a/test/CodeGen/X86/vec_fpext.ll +++ b/test/CodeGen/X86/vec_fpext.ll @@ -300,7 +300,6 @@ entry: } ; Make sure we don't narrow a volatile load. -; FIXME: We incorrectly narrow it for avx512vl. define <2 x double> @PR42079(<4 x float>* %x) { ; X32-SSE-LABEL: PR42079: ; X32-SSE: # %bb.0: @@ -319,7 +318,8 @@ define <2 x double> @PR42079(<4 x float>* %x) { ; X32-AVX512VL-LABEL: PR42079: ; X32-AVX512VL: # %bb.0: ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2pd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x00] +; X32-AVX512VL-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] +; X32-AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: PR42079: @@ -336,7 +336,8 @@ define <2 x double> @PR42079(<4 x float>* %x) { ; ; X64-AVX512VL-LABEL: PR42079: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x07] +; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] +; X64-AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %a = load volatile <4 x float>, <4 x float>* %x %b = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> -- 2.40.0