From: Simon Pilgrim Date: Wed, 3 Apr 2019 17:28:34 +0000 (+0000) Subject: [X86][AVX] combineHorizontalPredicateResult - split any/allof v16i16/v32i8 reduction... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1b7bd368ad428d381f2000caaecabeb08067bf0a;p=llvm [X86][AVX] combineHorizontalPredicateResult - split any/allof v16i16/v32i8 reduction on AVX1 Perform the 2 x 128-bit lo/hi OR/AND on the vectors before calling PMOVMSKB on the 128-bit result. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357611 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0ae7b5b254b..9db9d11904c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34316,6 +34316,14 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, if (DAG.ComputeNumSignBits(Match) != BitWidth) return SDValue(); + SDLoc DL(Extract); + if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); + Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); + MatchSizeInBits = Match.getValueSizeInBits(); + } + // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB. MVT MaskSrcVT; if (64 == BitWidth || 32 == BitWidth) @@ -34324,7 +34332,6 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, else MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); - SDLoc DL(Extract); SDValue CmpC; ISD::CondCode CondCode; if (BinOp == ISD::OR) { diff --git a/test/CodeGen/X86/vector-compare-all_of.ll b/test/CodeGen/X86/vector-compare-all_of.ll index 8c450945b4d..a2f7e9dda7a 100644 --- a/test/CodeGen/X86/vector-compare-all_of.ll +++ b/test/CodeGen/X86/vector-compare-all_of.ll @@ -673,12 +673,10 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx ; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF ; AVX1-NEXT: sete %al ; AVX1-NEXT: negl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax @@ -867,11 +865,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX1-NEXT: sete %al ; AVX1-NEXT: negb %al ; AVX1-NEXT: vzeroupper @@ -1555,11 +1551,9 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX1-NEXT: sete %al ; AVX1-NEXT: negb %al ; AVX1-NEXT: vzeroupper diff --git a/test/CodeGen/X86/vector-compare-any_of.ll b/test/CodeGen/X86/vector-compare-any_of.ll index 2236e953e07..73e476341c0 100644 --- a/test/CodeGen/X86/vector-compare-any_of.ll +++ b/test/CodeGen/X86/vector-compare-any_of.ll @@ -611,13 +611,10 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: vpmovmskb %xmm2, %edx -; AVX1-NEXT: shll $16, %edx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: setne %al +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -790,12 +787,10 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: setne %al -; AVX1-NEXT: negb %al +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbb %al, %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -1488,12 +1483,10 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: setne %al -; AVX1-NEXT: negb %al +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbb %al, %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ;