From dd23d7ede65336e1fa27cee850359754bbcd9af9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Jan 2017 06:59:06 +0000 Subject: [PATCH] [AVX-512] Remove duplicate CodeGenOnly patterns for scalar register broadcast. We can use COPY_TO_REGCLASS like AVX does. This causes stack spill slots be oversized sometimes, but the same should already be happening with AVX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293464 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 40 ++++++++--------------- lib/Target/X86/X86InstrInfo.cpp | 5 --- lib/Target/X86/X86InstrTablesInfo.h | 3 -- test/CodeGen/X86/avx512-vbroadcast.ll | 22 ++++++------- test/CodeGen/X86/avx512vl-vbroadcast.ll | 30 ++++++++--------- test/CodeGen/X86/evex-to-vex-compress.mir | 12 ------- 6 files changed, 40 insertions(+), 72 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2a2ab660c91..d58a93eed85 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -846,32 +846,20 @@ def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), // broadcast with a scalar argument. multiclass avx512_broadcast_scalar opc, string OpcodeStr, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { - - let isCodeGenOnly = 1 in { - def r_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst), - (ins SrcInfo.FRC:$src), OpcodeStr#"\t{$src, $dst|$dst, $src}", - [(set DestInfo.RC:$dst, (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)))]>, - Requires<[HasAVX512]>, T8PD, EVEX; - - let Constraints = "$src0 = $dst" in - def rk_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst), - (ins DestInfo.RC:$src0, DestInfo.KRCWM:$mask, SrcInfo.FRC:$src), - OpcodeStr#"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", - [(set DestInfo.RC:$dst, - (vselect DestInfo.KRCWM:$mask, - (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), - DestInfo.RC:$src0))]>, - Requires<[HasAVX512]>, T8PD, EVEX, EVEX_K; - - def rkz_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst), - (ins DestInfo.KRCWM:$mask, SrcInfo.FRC:$src), - OpcodeStr#"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", - [(set DestInfo.RC:$dst, - (vselect DestInfo.KRCWM:$mask, - (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), - DestInfo.ImmAllZerosV))]>, - Requires<[HasAVX512]>, T8PD, EVEX, EVEX_KZ; - } // let isCodeGenOnly = 1 in + def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), + (!cast(NAME#DestInfo.ZSuffix#r) + (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>; + def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, + (X86VBroadcast SrcInfo.FRC:$src), + DestInfo.RC:$src0)), + (!cast(NAME#DestInfo.ZSuffix#rk) + DestInfo.RC:$src0, DestInfo.KRCWM:$mask, + (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>; + def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, + (X86VBroadcast SrcInfo.FRC:$src), + DestInfo.ImmAllZerosV)), + (!cast(NAME#DestInfo.ZSuffix#rkz) + DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>; } multiclass avx512_broadcast_rm opc, string OpcodeStr, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 46a7604ec69..856fbf78037 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -867,9 +867,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, - { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, - { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, @@ -907,9 +905,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions (256-bit versions) { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, - { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, @@ -942,7 +938,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions (128-bit versions) { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h index 415a891bfd9..09e635c9dff 100755 --- a/lib/Target/X86/X86InstrTablesInfo.h +++ b/lib/Target/X86/X86InstrTablesInfo.h @@ -296,7 +296,6 @@ static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { { X86::VANDPSZ128rr , X86::VANDPSrr }, { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm }, { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr }, - { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr }, { X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm }, { X86::VCVTDQ2PDZ128rr , X86::VCVTDQ2PDrr }, { X86::VCVTDQ2PSZ128rm , X86::VCVTDQ2PSrm }, @@ -727,10 +726,8 @@ static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { { X86::VANDPSZ256rr , X86::VANDPSYrr }, { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, - { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm }, { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, - { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr }, { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm }, { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr }, { X86::VCVTDQ2PSZ256rm , X86::VCVTDQ2PSYrm }, diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll index 1991ee4f337..350c0d7873e 100644 --- a/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/test/CodeGen/X86/avx512-vbroadcast.ll @@ -128,7 +128,7 @@ define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %m ; ALL-NEXT: vpxor %ymm3, %ymm3, %ymm3 ; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ; ALL-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} -; ALL-NEXT: vmovaps %zmm1, %zmm0 +; ALL-NEXT: vmovapd %zmm1, %zmm0 ; ALL-NEXT: retq %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 @@ -406,14 +406,14 @@ declare void @func_f32(float) define <16 x float> @broadcast_ss_spill(float %x) { ; ALL-LABEL: broadcast_ss_spill: ; ALL: # BB#0: -; ALL-NEXT: pushq %rax +; ALL-NEXT: subq $24, %rsp ; ALL-NEXT: .Lcfi0: -; ALL-NEXT: .cfi_def_cfa_offset 16 +; ALL-NEXT: .cfi_def_cfa_offset 32 ; ALL-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; ALL-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; ALL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; ALL-NEXT: callq func_f32 -; ALL-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %zmm0 # 4-byte Folded Reload -; ALL-NEXT: popq %rax +; ALL-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload +; ALL-NEXT: addq $24, %rsp ; ALL-NEXT: retq %a = fadd float %x, %x call void @func_f32(float %a) @@ -426,14 +426,14 @@ declare void @func_f64(double) define <8 x double> @broadcast_sd_spill(double %x) { ; ALL-LABEL: broadcast_sd_spill: ; ALL: # BB#0: -; ALL-NEXT: pushq %rax +; ALL-NEXT: subq $24, %rsp ; ALL-NEXT: .Lcfi1: -; ALL-NEXT: .cfi_def_cfa_offset 16 +; ALL-NEXT: .cfi_def_cfa_offset 32 ; ALL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; ALL-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill +; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; ALL-NEXT: callq func_f64 -; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 8-byte Folded Reload -; ALL-NEXT: popq %rax +; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload +; ALL-NEXT: addq $24, %rsp ; ALL-NEXT: retq %a = fadd double %x, %x call void @func_f64(double %a) diff --git a/test/CodeGen/X86/avx512vl-vbroadcast.ll b/test/CodeGen/X86/avx512vl-vbroadcast.ll index d5204122000..38a461ff0be 100644 --- a/test/CodeGen/X86/avx512vl-vbroadcast.ll +++ b/test/CodeGen/X86/avx512vl-vbroadcast.ll @@ -5,14 +5,14 @@ declare void @func_f32(float) define <8 x float> @_256_broadcast_ss_spill(float %x) { ; CHECK-LABEL: _256_broadcast_ss_spill: ; CHECK: # BB#0: -; CHECK-NEXT: pushq %rax +; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: .Lcfi0: -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: callq func_f32 -; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 # 4-byte Folded Reload -; CHECK-NEXT: popq %rax +; CHECK-NEXT: vbroadcastss (%rsp), %ymm0 # 16-byte Folded Reload +; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq %a = fadd float %x, %x call void @func_f32(float %a) @@ -24,14 +24,14 @@ define <8 x float> @_256_broadcast_ss_spill(float %x) { define <4 x float> @_128_broadcast_ss_spill(float %x) { ; CHECK-LABEL: _128_broadcast_ss_spill: ; CHECK: # BB#0: -; CHECK-NEXT: pushq %rax +; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: .Lcfi1: -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: callq func_f32 -; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload -; CHECK-NEXT: popq %rax +; CHECK-NEXT: vbroadcastss (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq %a = fadd float %x, %x call void @func_f32(float %a) @@ -44,14 +44,14 @@ declare void @func_f64(double) define <4 x double> @_256_broadcast_sd_spill(double %x) { ; CHECK-LABEL: _256_broadcast_sd_spill: ; CHECK: # BB#0: -; CHECK-NEXT: pushq %rax +; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: .Lcfi2: -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: callq func_f64 -; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 8-byte Folded Reload -; CHECK-NEXT: popq %rax +; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 16-byte Folded Reload +; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq %a = fadd double %x, %x call void @func_f64(double %a) diff --git a/test/CodeGen/X86/evex-to-vex-compress.mir b/test/CodeGen/X86/evex-to-vex-compress.mir index 043f3a38aa7..099189119ed 100755 --- a/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/test/CodeGen/X86/evex-to-vex-compress.mir @@ -691,14 +691,10 @@ body: | %ymm0 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0 %ymm0 = VBROADCASTSDZ256r %xmm0 - ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0 - %ymm0 = VBROADCASTSDZ256r_s %xmm0 ; CHECK: %ymm0 = VBROADCASTSSYrm %rip, 1, _, %rax, _ %ymm0 = VBROADCASTSSZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0 %ymm0 = VBROADCASTSSZ256r %xmm0 - ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0 - %ymm0 = VBROADCASTSSZ256r_s %xmm0 ; CHECK: %ymm0 = VPBROADCASTBYrm %rip, 1, _, %rax, _ %ymm0 = VPBROADCASTBZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VPBROADCASTBYrr %xmm0 @@ -1695,8 +1691,6 @@ body: | %xmm0 = VBROADCASTSSZ128m %rip, _, _, _, _ ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0 %xmm0 = VBROADCASTSSZ128r %xmm0 - ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0 - %xmm0 = VBROADCASTSSZ128r_s %xmm0 ; CHECK: %xmm0 = VPBROADCASTBrm %rip, _, _, _, _ %xmm0 = VPBROADCASTBZ128m %rip, _, _, _, _ ; CHECK: %xmm0 = VPBROADCASTBrr %xmm0 @@ -2928,14 +2922,10 @@ body: | %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VBROADCASTSDZ256r %xmm0 %ymm16 = VBROADCASTSDZ256r %xmm0 - ; CHECK: %ymm16 = VBROADCASTSDZ256r_s %xmm0 - %ymm16 = VBROADCASTSDZ256r_s %xmm0 ; CHECK: %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _ %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VBROADCASTSSZ256r %xmm0 %ymm16 = VBROADCASTSSZ256r %xmm0 - ; CHECK: %ymm16 = VBROADCASTSSZ256r_s %xmm0 - %ymm16 = VBROADCASTSSZ256r_s %xmm0 ; CHECK: %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _ %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VPBROADCASTBZ256r %xmm0 @@ -3932,8 +3922,6 @@ body: | %xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _ ; CHECK: %xmm16 = VBROADCASTSSZ128r %xmm16 %xmm16 = VBROADCASTSSZ128r %xmm16 - ; CHECK: %xmm16 = VBROADCASTSSZ128r_s %xmm16 - %xmm16 = VBROADCASTSSZ128r_s %xmm16 ; CHECK: %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _ %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _ ; CHECK: %xmm16 = VPBROADCASTBZ128r %xmm16 -- 2.50.1