From: Craig Topper Date: Tue, 7 Nov 2017 07:13:07 +0000 (+0000) Subject: [X86] Add patterns to fold a 64-bit load into the EVEX vcvtph2ps instructions. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c305f3d45a57fa0121c73b685e000692186b7cb0;p=llvm [X86] Add patterns to fold a 64-bit load into the EVEX vcvtph2ps instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317548 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 9ad334f94f3..4aa57f4c050 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7196,16 +7196,25 @@ multiclass avx512_cvtph2ps_sae { } -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512] in defm VCVTPH2PSZ : avx512_cvtph2ps, avx512_cvtph2ps_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; - let Predicates = [HasVLX] in { - defm VCVTPH2PSZ256 : avx512_cvtph2ps,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; - defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; - } + +let Predicates = [HasVLX] in { + defm VCVTPH2PSZ256 : avx512_cvtph2ps,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + + // Pattern match vcvtph2ps of a scalar i64 load. + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), + (VCVTPH2PSZ128rm addr:$src)>; + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), + (VCVTPH2PSZ128rm addr:$src)>; + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert + (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VCVTPH2PSZ128rm addr:$src)>; } multiclass avx512_cvtps2ph @test_x86_vcvtps2ph_128_scalar(i64* %ptr) { ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar: ; X32-AVX512VL: # BB#0: ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X32-AVX512VL-NEXT: # xmm0 = mem[0],zero -; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] +; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar: ; X64-AVX512VL: # BB#0: -; X64-AVX512VL-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-AVX512VL-NEXT: # xmm0 = mem[0],zero -; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] +; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %load = load i64, i64* %ptr %ins1 = insertelement <2 x i64> undef, i64 %load, i32 0 @@ -211,16 +207,12 @@ define <4 x float> @test_x86_vcvtps2ph_128_scalar2(i64* %ptr) { ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2: ; X32-AVX512VL: # BB#0: ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X32-AVX512VL-NEXT: # xmm0 = mem[0],zero -; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] +; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2: ; X64-AVX512VL: # BB#0: -; X64-AVX512VL-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-AVX512VL-NEXT: # xmm0 = mem[0],zero -; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] +; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %load = load i64, i64* %ptr %ins = insertelement <2 x i64> undef, i64 %load, i32 0