From d57faf858c7cd7dbf406051fc822a2a66c4cd120 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 4 Oct 2017 18:00:42 +0000 Subject: [PATCH] [X86][AVX] Improve (i8 bitcast (v8i1 x)) handling for v8i64/v8f64 512-bit vector compare results. AVX1/AVX2 targets were missing a chance to use vmovmskps for v8f32/v8i32 results for bool vector bitcasts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314921 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +++++------ test/CodeGen/X86/bitcast-setcc-512.ll | 20 ++++++-------------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5df5a21f752..9aae58b5555 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -29505,9 +29505,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, FPCastVT = MVT::v4f32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (N0->getOpcode() == ISD::SETCC && - N0->getOperand(0)->getValueType(0).is256BitVector() && - Subtarget.hasAVX()) { + if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && + N0->getOperand(0)->getValueType(0).is256BitVector()) { SExtVT = MVT::v4i64; FPCastVT = MVT::v4f64; } @@ -29519,9 +29518,9 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. - if (N0->getOpcode() == ISD::SETCC && - N0->getOperand(0)->getValueType(0).is256BitVector() && - Subtarget.hasAVX()) { + if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && + (N0->getOperand(0)->getValueType(0).is256BitVector() || + N0->getOperand(0)->getValueType(0).is512BitVector())) { SExtVT = MVT::v8i32; FPCastVT = MVT::v8f32; } diff --git a/test/CodeGen/X86/bitcast-setcc-512.ll b/test/CodeGen/X86/bitcast-setcc-512.ll index 4ab96c3945d..32044a83910 100644 --- a/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/test/CodeGen/X86/bitcast-setcc-512.ll @@ -1074,9 +1074,8 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b) { ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: %AL %AL %EAX ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1087,10 +1086,7 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b) { ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: %AL %AL %EAX ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1138,9 +1134,8 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b) { ; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: %AL %AL %EAX ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1151,10 +1146,7 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b) { ; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: %AL %AL %EAX ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq -- 2.40.0