}
if (SExtVT == MVT::v8i16) {
- V = DAG.getBitcast(MVT::v16i8, V);
- V = DAG.getVectorShuffle(
- MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8),
- {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
+ assert(16 == DAG.ComputeNumSignBits(V) && "Expected all/none bit vector");
+ V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
+ DAG.getUNDEF(MVT::v8i16));
} else
assert(SExtVT.getScalarType() != MVT::i16 &&
- "Vectors of i16 must be shuffled");
+ "Vectors of i16 must be packed");
if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
V = DAG.getBitcast(FPCastVT, V);
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
-; SSE2-LABEL: v8i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
-; SSE2-NEXT: pand %xmm0, %xmm2
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
-; SSE2-NEXT: packuswb %xmm2, %xmm2
-; SSE2-NEXT: pmovmskb %xmm2, %eax
-; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSSE3-LABEL: v8i16:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
-; SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
-; SSSE3-NEXT: pand %xmm0, %xmm2
-; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pmovmskb %xmm2, %eax
-; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSSE3-NEXT: ret{{[l|q]}}
+; SSE2-SSSE3-LABEL: v8i16:
+; SSE2-SSSE3: # BB#0:
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
+; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; SSE2-SSSE3-NEXT: ret{{[l|q]}}
;
; AVX12-LABEL: v8i16:
; AVX12: # BB#0:
; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX12-NEXT: ret{{[l|q]}}
}
define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
-; SSE2-LABEL: v8i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: psllw $8, %xmm3
-; SSE2-NEXT: psraw $8, %xmm3
-; SSE2-NEXT: psllw $8, %xmm2
-; SSE2-NEXT: psraw $8, %xmm2
-; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
-; SSE2-NEXT: psllw $8, %xmm1
-; SSE2-NEXT: psraw $8, %xmm1
-; SSE2-NEXT: psllw $8, %xmm0
-; SSE2-NEXT: psraw $8, %xmm0
-; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSSE3-LABEL: v8i8:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: psllw $8, %xmm3
-; SSSE3-NEXT: psraw $8, %xmm3
-; SSSE3-NEXT: psllw $8, %xmm2
-; SSSE3-NEXT: psraw $8, %xmm2
-; SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
-; SSSE3-NEXT: psllw $8, %xmm1
-; SSSE3-NEXT: psraw $8, %xmm1
-; SSSE3-NEXT: psllw $8, %xmm0
-; SSSE3-NEXT: psraw $8, %xmm0
-; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
-; SSSE3-NEXT: pand %xmm2, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pmovmskb %xmm0, %eax
-; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSSE3-NEXT: ret{{[l|q]}}
+; SSE2-SSSE3-LABEL: v8i8:
+; SSE2-SSSE3: # BB#0:
+; SSE2-SSSE3-NEXT: psllw $8, %xmm3
+; SSE2-SSSE3-NEXT: psraw $8, %xmm3
+; SSE2-SSSE3-NEXT: psllw $8, %xmm2
+; SSE2-SSSE3-NEXT: psraw $8, %xmm2
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: psllw $8, %xmm1
+; SSE2-SSSE3-NEXT: psraw $8, %xmm1
+; SSE2-SSSE3-NEXT: psllw $8, %xmm0
+; SSE2-SSSE3-NEXT: psraw $8, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
+; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; SSE2-SSSE3-NEXT: ret{{[l|q]}}
;
; AVX12-LABEL: v8i8:
; AVX12: # BB#0:
; AVX12-NEXT: vpsraw $8, %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX12-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX12-NEXT: ret{{[l|q]}}
}
define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
-; SSE2-LABEL: v8i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
-; SSE2-NEXT: packssdw %xmm1, %xmm0
-; SSE2-NEXT: pcmpgtd %xmm7, %xmm5
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
-; SSE2-NEXT: packssdw %xmm5, %xmm4
-; SSE2-NEXT: pand %xmm0, %xmm4
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm4
-; SSE2-NEXT: packuswb %xmm4, %xmm4
-; SSE2-NEXT: pmovmskb %xmm4, %eax
-; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSSE3-LABEL: v8i32:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
-; SSSE3-NEXT: packssdw %xmm1, %xmm0
-; SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
-; SSSE3-NEXT: packssdw %xmm5, %xmm4
-; SSSE3-NEXT: pand %xmm0, %xmm4
-; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pmovmskb %xmm4, %eax
-; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSSE3-NEXT: ret{{[l|q]}}
+; SSE2-SSSE3-LABEL: v8i32:
+; SSE2-SSSE3: # BB#0:
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
+; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
+; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
+; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
+; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; SSE2-SSSE3-NEXT: ret{{[l|q]}}
;
; AVX1-LABEL: v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX1-NEXT: vzeroupper
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE2-NEXT: pand %xmm0, %xmm2
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
-; SSE2-NEXT: packuswb %xmm2, %xmm2
+; SSE2-NEXT: packsswb %xmm0, %xmm2
; SSE2-NEXT: pmovmskb %xmm2, %eax
; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE2-NEXT: ret{{[l|q]}}
; SSSE3-NEXT: pshufb %xmm1, %xmm6
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; SSSE3-NEXT: pand %xmm2, %xmm6
-; SSSE3-NEXT: pshufb {{.*#+}} xmm6 = xmm6[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: packsswb %xmm0, %xmm6
; SSSE3-NEXT: pmovmskb %xmm6, %eax
; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSSE3-NEXT: ret{{[l|q]}}
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX12-NEXT: vzeroupper
}
define i8 @v8i32(<8 x i32> %a, <8 x i32> %b) {
-; SSE2-LABEL: v8i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
-; SSE2-NEXT: packsswb %xmm1, %xmm0
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: v8i32:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
-; SSSE3-NEXT: packsswb %xmm1, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pmovmskb %xmm0, %eax
-; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSSE3-NEXT: retq
+; SSE2-SSSE3-LABEL: v8i32:
+; SSE2-SSSE3: # BB#0:
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
+; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: v8i32:
; AVX1: # BB#0:
}
define i8 @v8f32(<8 x float> %a, <8 x float> %b) {
-; SSE2-LABEL: v8f32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cmpltps %xmm1, %xmm3
-; SSE2-NEXT: cmpltps %xmm0, %xmm2
-; SSE2-NEXT: packsswb %xmm3, %xmm2
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
-; SSE2-NEXT: packuswb %xmm2, %xmm2
-; SSE2-NEXT: pmovmskb %xmm2, %eax
-; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: v8f32:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: cmpltps %xmm1, %xmm3
-; SSSE3-NEXT: cmpltps %xmm0, %xmm2
-; SSSE3-NEXT: packsswb %xmm3, %xmm2
-; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pmovmskb %xmm2, %eax
-; SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; SSSE3-NEXT: retq
+; SSE2-SSSE3-LABEL: v8f32:
+; SSE2-SSSE3: # BB#0:
+; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
+; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
+; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: v8f32:
; AVX12: # BB#0:
; SSE-NEXT: pcmpgtq %xmm4, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: packsswb %xmm2, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE-NEXT: retq
; SSE-NEXT: cmpltpd %xmm0, %xmm4
; SSE-NEXT: packsswb %xmm5, %xmm4
; SSE-NEXT: packsswb %xmm6, %xmm4
-; SSE-NEXT: pshufb {{.*#+}} xmm4 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE-NEXT: packsswb %xmm0, %xmm4
; SSE-NEXT: pmovmskb %xmm4, %eax
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE-NEXT: retq