DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
}
-multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+// Split version to allow mask and broadcast node to be different types. This
+// helps support the 32x2 broadcasts.
+multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo MaskInfo,
+ X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
let ExeDomain = DestInfo.ExeDomain in {
- defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ defm r : AVX512_maskable<opc, MRMSrcReg, MaskInfo, (outs MaskInfo.RC:$dst),
(ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
- (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
T8PD, EVEX;
- defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ defm m : AVX512_maskable<opc, MRMSrcMem, MaskInfo, (outs MaskInfo.RC:$dst),
(ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
- (DestInfo.VT (X86VBroadcast
- (SrcInfo.ScalarLdFrag addr:$src)))>,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT (X86VBroadcast
+ (SrcInfo.ScalarLdFrag addr:$src)))))>,
T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
}
- def : Pat<(DestInfo.VT (X86VBroadcast
- (SrcInfo.VT (scalar_to_vector
- (SrcInfo.ScalarLdFrag addr:$src))))),
- (!cast<Instruction>(NAME#DestInfo.ZSuffix#m) addr:$src)>;
- def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
- (X86VBroadcast
- (SrcInfo.VT (scalar_to_vector
- (SrcInfo.ScalarLdFrag addr:$src)))),
- DestInfo.RC:$src0)),
+ def : Pat<(MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT (X86VBroadcast
+ (SrcInfo.VT (scalar_to_vector
+ (SrcInfo.ScalarLdFrag addr:$src))))))),
+ (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
+ def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast
+ (SrcInfo.VT (scalar_to_vector
+ (SrcInfo.ScalarLdFrag addr:$src)))))),
+ MaskInfo.RC:$src0)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
- DestInfo.RC:$src0, DestInfo.KRCWM:$mask, addr:$src)>;
- def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
- (X86VBroadcast
- (SrcInfo.VT (scalar_to_vector
- (SrcInfo.ScalarLdFrag addr:$src)))),
- DestInfo.ImmAllZerosV)),
- (!cast<Instruction>(NAME#DestInfo.ZSuffix#mkz)
- DestInfo.KRCWM:$mask, addr:$src)>;
-}
+ MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
+ def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast
+ (SrcInfo.VT (scalar_to_vector
+ (SrcInfo.ScalarLdFrag addr:$src)))))),
+ MaskInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
+ MaskInfo.KRCWM:$mask, addr:$src)>;
+}
+
+// Helper class to force mask and broadcast result to same type.
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> :
+ avx512_broadcast_rm_split<opc, OpcodeStr, DestInfo, DestInfo, SrcInfo>;
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
let Predicates = [HasDQI] in
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info512, _Src.info128>,
- EVEX_V512;
+ defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, _Dst.info512,
+ _Src.info512, _Src.info128>,
+ EVEX_V512;
let Predicates = [HasDQI, HasVLX] in
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info256, _Src.info128>,
- EVEX_V256;
+ defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, _Dst.info256,
+ _Src.info256, _Src.info128>,
+ EVEX_V256;
}
multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
let Predicates = [HasDQI, HasVLX] in
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info128, _Src.info128>,
- EVEX_V128;
+ defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, _Dst.info128,
+ _Src.info128, _Src.info128>,
+ EVEX_V128;
}
defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
%res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
+
+define <4 x float> @test_broadcastf32x2_v4f32(<4 x float> %vec, <4 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: test_broadcastf32x2_v4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> %passthru
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_broadcastf32x2_v4f32_z(<4 x float> %vec, i8 %mask) {
+; CHECK-LABEL: test_broadcastf32x2_v4f32_z:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> zeroinitializer
+ ret <4 x float> %res
+}
+
+define <4 x i32> @test_broadcasti32x2_v4i32(<4 x i32> %vec, <4 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: test_broadcasti32x2_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> %passthru
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_broadcasti32x2_v4i32_z(<4 x i32> %vec, i8 %mask) {
+; CHECK-LABEL: test_broadcasti32x2_v4i32_z:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+
+define <8 x float> @test_broadcastf32x2_v8f32(<8 x float> %vec, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: test_broadcastf32x2_v8f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vmovapd %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> %passthru
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_broadcastf32x2_v8f32_z(<8 x float> %vec, i8 %mask) {
+; CHECK-LABEL: test_broadcastf32x2_v8f32_z:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: retq
+ %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> zeroinitializer
+ ret <8 x float> %res
+}
+
+define <8 x i32> @test_broadcasti32x2_v8i32(<8 x i32> %vec, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: test_broadcasti32x2_v8i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> %passthru
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_broadcasti32x2_v8i32_z(<8 x i32> %vec, i8 %mask) {
+; CHECK-LABEL: test_broadcasti32x2_v8i32_z:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: retq
+ %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+
+define <16 x float> @test_broadcastf32x2_v16f32_z(<16 x float> %vec, i16 %mask) {
+; CHECK-LABEL: test_broadcastf32x2_v16f32_z:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: retq
+ %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> zeroinitializer
+ ret <16 x float> %res
+}
+
+define <16 x i32> @test_broadcasti32x2_v16i32(<16 x i32> %vec, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: test_broadcasti32x2_v16i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> %passthru
+ ret <16 x i32> %res
+}
+
+define <16 x float> @test_broadcastf32x2_v16f32(<16 x float> %vec, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: test_broadcastf32x2_v16f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> %passthru
+ ret <16 x float> %res
+}
+
+define <16 x i32> @test_broadcasti32x2_v16i32_z(<16 x i32> %vec, i16 %mask) {
+; CHECK-LABEL: test_broadcasti32x2_v16i32_z:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: retq
+ %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}