From 61044921b4c5b0a01ffcf53e837600605182710f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Sep 2019 04:41:10 +0000 Subject: [PATCH] [X86] Allow masked VBROADCAST instructions to be turned into BLENDM with a broadcast load to avoid a copy. The BLENDM instructions allow an 2 sources and an independent destination while masked VBROADCAST has the destination tied to the source. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372068 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 151 ++++++++++++++------ lib/Target/X86/X86InstrInfo.cpp | 85 +++++++---- test/CodeGen/X86/avx512-broadcast-unfold.ll | 3 +- 3 files changed, 159 insertions(+), 80 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 49b7b09ee2f..4127e4d5644 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1123,50 +1123,103 @@ multiclass avx512_broadcast_rm_split opc, string OpcodeStr, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, + bit IsConvertibleToThreeAddress, SDPatternOperator UnmaskedOp = X86VBroadcast> { - let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { - defm r : AVX512_maskable_split, - T8PD, EVEX, Sched<[SchedRR]>; - let mayLoad = 1 in - defm m : AVX512_maskable_split, - T8PD, EVEX, EVEX_CD8, - Sched<[SchedRM]>; - } + let hasSideEffects = 0 in + def r : AVX512PI, T8PD, EVEX, Sched<[SchedRR]>; + def rkz : AVX512PI, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; + let Constraints = "$src0 = $dst" in + def rk : AVX512PI, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; + + let hasSideEffects = 0, mayLoad = 1 in + def m : AVX512PI, T8PD, EVEX, + EVEX_CD8, Sched<[SchedRM]>; + + def mkz : AVX512PI, T8PD, EVEX, EVEX_KZ, + EVEX_CD8, Sched<[SchedRM]>; + + let Constraints = "$src0 = $dst", + isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def mk : AVX512PI, T8PD, EVEX, EVEX_K, + EVEX_CD8, Sched<[SchedRM]>; } // Helper class to force mask and broadcast result to same type. multiclass avx512_broadcast_rm opc, string OpcodeStr, string Name, SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo DestInfo, - X86VectorVTInfo SrcInfo> : + X86VectorVTInfo SrcInfo, + bit IsConvertibleToThreeAddress> : avx512_broadcast_rm_split; + DestInfo, DestInfo, SrcInfo, + IsConvertibleToThreeAddress>; multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar, EVEX_V512; @@ -1174,7 +1227,7 @@ multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar, EVEX_V256; @@ -1185,7 +1238,7 @@ multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar, EVEX_V512; @@ -1193,12 +1246,12 @@ multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info128, _.info128, 1>, avx512_broadcast_scalar, EVEX_V128; @@ -1283,30 +1336,34 @@ defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, X86VBroadcast, GR64, HasAVX512>, VEX_W; multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + bit IsConvertibleToThreeAddress> { let Predicates = [prd] in { defm Z : avx512_broadcast_rm, + WriteShuffle256Ld, _.info512, _.info128, + IsConvertibleToThreeAddress>, EVEX_V512; } let Predicates = [prd, HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteShuffle256Ld, _.info256, _.info128, + IsConvertibleToThreeAddress>, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteShuffleXLd, _.info128, _.info128, + IsConvertibleToThreeAddress>, EVEX_V128; } } defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", - avx512vl_i8_info, HasBWI>; + avx512vl_i8_info, HasBWI, 0>; defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", - avx512vl_i16_info, HasBWI>; + avx512vl_i16_info, HasBWI, 0>; defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", - avx512vl_i32_info, HasAVX512>; + avx512vl_i32_info, HasAVX512, 1>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", - avx512vl_i64_info, HasAVX512>, VEX_W1X; + avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { @@ -1612,12 +1669,12 @@ multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, let Predicates = [HasDQI] in defm Z : avx512_broadcast_rm_split, + _Src.info512, _Src.info128, 0, null_frag>, EVEX_V512; let Predicates = [HasDQI, HasVLX] in defm Z256 : avx512_broadcast_rm_split, + _Src.info256, _Src.info128, 0, null_frag>, EVEX_V256; } @@ -1628,7 +1685,7 @@ multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split, + _Src.info128, _Src.info128, 0, null_frag>, EVEX_V128; } @@ -1913,7 +1970,7 @@ multiclass WriteFVarBlendask opc, string OpcodeStr, } multiclass WriteFVarBlendask_rmb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let mayLoad = 1, hasSideEffects = 0 in { + let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128IThis Inner Loop Header: Depth=1 ; CHECK-NEXT: vcmpgtps 4096(%rdi,%rax), %ymm0, %k1 -; CHECK-NEXT: vmovaps %ymm1, %ymm2 -; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 {%k1} +; CHECK-NEXT: vblendmps {{.*}}(%rip){1to8}, %ymm1, %ymm2 {%k1} ; CHECK-NEXT: vmovups %ymm2, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB126_1 -- 2.40.0