From: Craig Topper Date: Sat, 1 Oct 2016 06:01:23 +0000 (+0000) Subject: [AVX-512] Add EVEX versions of VPBROADCASTW patterns with truncated i32 loads. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=825f7d897c1bb0b4625f9319fcaccfe5f301fce4;p=llvm [AVX-512] Add EVEX versions of VPBROADCASTW patterns with truncated i32 loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283015 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index fc28e5df947..950fbc5e402 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1009,6 +1009,21 @@ multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, AVX5128IBase, EVEX; } +let Predicates = [HasVLX, HasBWI] in { + // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. + // This means we'll encounter truncated i32 loads; match that here. + def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), + (VPBROADCASTWZ256m addr:$src)>; + def : Pat<(v8i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWZ256m addr:$src)>; +} + //===----------------------------------------------------------------------===// // AVX-512 BROADCAST SUBVECTORS // diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0f3232bb1ed..98d5dac9e8e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8318,7 +8318,7 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64, NoVLX>; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), @@ -8331,7 +8331,9 @@ let Predicates = [HasAVX2] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWYrm addr:$src)>; +} +let Predicates = [HasAVX2] in { // Provide aliases for broadcast from the same register class that // automatically does the extract. def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),