MVT VT = Op.getSimpleValueType();
// Vectors containing all zeros can be matched by pxor and xorps.
- if (ISD::isBuildVectorAllZeros(Op.getNode())) {
- // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
- // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (VT.isFloatingPoint() ||
- VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
- return Op;
-
- return getZeroVector(VT, Subtarget, DAG, DL);
- }
+ if (ISD::isBuildVectorAllZeros(Op.getNode()))
+ return Op;
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
}
let Predicates = [HasAVX512] in {
+def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
}
}
let Predicates = [HasAVX512] in {
+def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
}
}
let Predicates = [NoAVX512] in {
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
}
}
let Predicates = [NoAVX512] in {
+def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
}
define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
; AVX512-LABEL: PR41066:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x57,0xc0]
+; AVX512-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: vcmpltpd %zmm1, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc9,0x01]
+; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc0]
+; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: PR41066:
; SKX: ## %bb.0:
-; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
+; SKX-NEXT: vcmpltpd %xmm1, %xmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x08,0xc2,0xc9,0x01]
+; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; SKX-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%t1 = fcmp ogt <2 x double> %x, %y
%t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
define <16 x i8> @constant_fold_pshufb_2() {
; SSE-LABEL: constant_fold_pshufb_2:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSE-NEXT: movl $2, %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: constant_fold_pshufb_2:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX-NEXT: movl $2, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 2, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
ret <16 x i8> %1