From: Simon Pilgrim Date: Tue, 19 Feb 2019 16:33:17 +0000 (+0000) Subject: [X86][AVX] Update VBROADCAST folds to always use v2i64 X86vzload X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f8fa4080b9d6e3e9c2ba525a86afbf879e98c76c;p=llvm [X86][AVX] Update VBROADCAST folds to always use v2i64 X86vzload The VBROADCAST combines and SimplifyDemandedVectorElts improvements mean that we now more consistently use shorter (128-bit) X86vzload input operands. Follow up to D58053 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354346 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 0fba8cb4d09..c82d1703a0d 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1378,7 +1378,7 @@ multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))), + def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQZm addr:$src)>; } @@ -1386,7 +1386,7 @@ let Predicates = [HasVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQZ128m addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), + def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index bc2622446bc..a55b1489e9e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7850,7 +7850,7 @@ let Predicates = [HasAVX2, NoVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQrm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), + def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQYrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), diff --git a/test/CodeGen/X86/insertelement-shuffle.ll b/test/CodeGen/X86/insertelement-shuffle.ll index 8880dda4984..5b44337785e 100644 --- a/test/CodeGen/X86/insertelement-shuffle.ll +++ b/test/CodeGen/X86/insertelement-shuffle.ll @@ -95,8 +95,7 @@ define <8 x i64> @insert_subvector_into_undef(i32 %x0, i32 %x1) nounwind { ; ; X86_AVX512-LABEL: insert_subvector_into_undef: ; X86_AVX512: # %bb.0: -; X86_AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86_AVX512-NEXT: vbroadcastsd %xmm0, %zmm0 +; X86_AVX512-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0 ; X86_AVX512-NEXT: retl ; ; X64_AVX512-LABEL: insert_subvector_into_undef: diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 963fb98f56a..bedf6823277 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -655,8 +655,7 @@ define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) { define <8 x i32> @combine_permd_insertion_as_broadcast_v4i64(i64 %a0) { ; X86-LABEL: combine_permd_insertion_as_broadcast_v4i64: ; X86: # %bb.0: -; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-NEXT: vbroadcastsd %xmm0, %ymm0 +; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: combine_permd_insertion_as_broadcast_v4i64: diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 6a295ba8cc5..b8efb26d6fc 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -975,8 +975,7 @@ define <16 x float> @combine_vpermi2var_vpermvar_16f32_as_vperm2_zero(<16 x floa define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) { ; X86-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64: ; X86: # %bb.0: -; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-NEXT: vbroadcastsd %xmm0, %zmm0 +; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0 ; X86-NEXT: retl ; ; X64-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64: