// broadcast with a scalar argument.
multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
-
- let isCodeGenOnly = 1 in {
- def r_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins SrcInfo.FRC:$src), OpcodeStr#"\t{$src, $dst|$dst, $src}",
- [(set DestInfo.RC:$dst, (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)))]>,
- Requires<[HasAVX512]>, T8PD, EVEX;
-
- let Constraints = "$src0 = $dst" in
- def rk_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.RC:$src0, DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.RC:$src0))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_K;
-
- def rkz_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.ImmAllZerosV))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_KZ;
- } // let isCodeGenOnly = 1 in
+ def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
+ DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
+ DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
}
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
// AVX-512 foldable instructions
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
{ X86::VANDPSZ128rr , X86::VANDPSrr },
{ X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
{ X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
- { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
{ X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm },
{ X86::VCVTDQ2PDZ128rr , X86::VCVTDQ2PDrr },
{ X86::VCVTDQ2PSZ128rm , X86::VCVTDQ2PSrm },
{ X86::VANDPSZ256rr , X86::VANDPSYrr },
{ X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
{ X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
{ X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
- { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
{ X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
{ X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
{ X86::VCVTDQ2PSZ256rm , X86::VCVTDQ2PSYrm },
; ALL-NEXT: vpxor %ymm3, %ymm3, %ymm3
; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; ALL-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
-; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: vmovapd %zmm1, %zmm0
; ALL-NEXT: retq
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
define <16 x float> @broadcast_ss_spill(float %x) {
; ALL-LABEL: broadcast_ss_spill:
; ALL: # BB#0:
-; ALL-NEXT: pushq %rax
+; ALL-NEXT: subq $24, %rsp
; ALL-NEXT: .Lcfi0:
-; ALL-NEXT: .cfi_def_cfa_offset 16
+; ALL-NEXT: .cfi_def_cfa_offset 32
; ALL-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; ALL-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; ALL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; ALL-NEXT: callq func_f32
-; ALL-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %zmm0 # 4-byte Folded Reload
-; ALL-NEXT: popq %rax
+; ALL-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload
+; ALL-NEXT: addq $24, %rsp
; ALL-NEXT: retq
%a = fadd float %x, %x
call void @func_f32(float %a)
define <8 x double> @broadcast_sd_spill(double %x) {
; ALL-LABEL: broadcast_sd_spill:
; ALL: # BB#0:
-; ALL-NEXT: pushq %rax
+; ALL-NEXT: subq $24, %rsp
; ALL-NEXT: .Lcfi1:
-; ALL-NEXT: .cfi_def_cfa_offset 16
+; ALL-NEXT: .cfi_def_cfa_offset 32
; ALL-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; ALL-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; ALL-NEXT: callq func_f64
-; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 8-byte Folded Reload
-; ALL-NEXT: popq %rax
+; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload
+; ALL-NEXT: addq $24, %rsp
; ALL-NEXT: retq
%a = fadd double %x, %x
call void @func_f64(double %a)
define <8 x float> @_256_broadcast_ss_spill(float %x) {
; CHECK-LABEL: _256_broadcast_ss_spill:
; CHECK: # BB#0:
-; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi0:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f32
-; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 # 4-byte Folded Reload
-; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vbroadcastss (%rsp), %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
%a = fadd float %x, %x
call void @func_f32(float %a)
define <4 x float> @_128_broadcast_ss_spill(float %x) {
; CHECK-LABEL: _128_broadcast_ss_spill:
; CHECK: # BB#0:
-; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi1:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f32
-; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
-; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vbroadcastss (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
%a = fadd float %x, %x
call void @func_f32(float %a)
define <4 x double> @_256_broadcast_sd_spill(double %x) {
; CHECK-LABEL: _256_broadcast_sd_spill:
; CHECK: # BB#0:
-; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi2:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f64
-; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 8-byte Folded Reload
-; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
%a = fadd double %x, %x
call void @func_f64(double %a)
%ymm0 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0
%ymm0 = VBROADCASTSDZ256r %xmm0
- ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0
- %ymm0 = VBROADCASTSDZ256r_s %xmm0
; CHECK: %ymm0 = VBROADCASTSSYrm %rip, 1, _, %rax, _
%ymm0 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0
%ymm0 = VBROADCASTSSZ256r %xmm0
- ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0
- %ymm0 = VBROADCASTSSZ256r_s %xmm0
; CHECK: %ymm0 = VPBROADCASTBYrm %rip, 1, _, %rax, _
%ymm0 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
; CHECK: %ymm0 = VPBROADCASTBYrr %xmm0
%xmm0 = VBROADCASTSSZ128m %rip, _, _, _, _
; CHECK: %xmm0 = VBROADCASTSSrr %xmm0
%xmm0 = VBROADCASTSSZ128r %xmm0
- ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0
- %xmm0 = VBROADCASTSSZ128r_s %xmm0
; CHECK: %xmm0 = VPBROADCASTBrm %rip, _, _, _, _
%xmm0 = VPBROADCASTBZ128m %rip, _, _, _, _
; CHECK: %xmm0 = VPBROADCASTBrr %xmm0
%ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
; CHECK: %ymm16 = VBROADCASTSDZ256r %xmm0
%ymm16 = VBROADCASTSDZ256r %xmm0
- ; CHECK: %ymm16 = VBROADCASTSDZ256r_s %xmm0
- %ymm16 = VBROADCASTSDZ256r_s %xmm0
; CHECK: %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
%ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
; CHECK: %ymm16 = VBROADCASTSSZ256r %xmm0
%ymm16 = VBROADCASTSSZ256r %xmm0
- ; CHECK: %ymm16 = VBROADCASTSSZ256r_s %xmm0
- %ymm16 = VBROADCASTSSZ256r_s %xmm0
; CHECK: %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
%ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
; CHECK: %ymm16 = VPBROADCASTBZ256r %xmm0
%xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _
; CHECK: %xmm16 = VBROADCASTSSZ128r %xmm16
%xmm16 = VBROADCASTSSZ128r %xmm16
- ; CHECK: %xmm16 = VBROADCASTSSZ128r_s %xmm16
- %xmm16 = VBROADCASTSSZ128r_s %xmm16
; CHECK: %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _
%xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _
; CHECK: %xmm16 = VPBROADCASTBZ128r %xmm16