From e279e40c27455d0f29aacc53f349a6e4e3fb55ba Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 26 Jun 2019 12:19:11 +0000 Subject: [PATCH] [X86] X86DAGToDAGISel::matchBitExtract(): pattern a: truncation awareness Summary: Finally tying up loose ends here. The problem is quite simple: If we have pattern `(x >> start) & (1 << nbits) - 1`, and then truncate the result, that truncation will be propagated upwards, into the `and`. And that isn't currently handled. I'm only fixing pattern `a` here, the same fix will be needed for patterns `b`/`c` too. I *think* this isn't missing any extra legality checks, since we only look past truncations. Similary, i don't think we can get any other truncation there other than i64->i32. Reviewers: craig.topper, RKSimon, spatel Reviewed By: craig.topper Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62786 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364417 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 49 ++++++++++++++++++----------- test/CodeGen/X86/extract-bits.ll | 36 ++++++--------------- test/CodeGen/X86/extract-lowbits.ll | 30 ++++-------------- 3 files changed, 47 insertions(+), 68 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e06d36e3500..537a68910ab 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3150,16 +3150,27 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); }; auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); }; + auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { + if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { + assert(V.getSimpleValueType() == MVT::i32 && + V.getOperand(0).getSimpleValueType() == MVT::i64 && + "Expected i64 -> i32 truncation"); + V = V.getOperand(0); + } + return V; + }; + // a) x & ((1 << nbits) + (-1)) - auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool { + auto matchPatternA = [&checkOneUse, peekThroughOneUseTruncation, + &NBits](SDValue Mask) -> bool { // Match `add`. Must only have one use! if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) return false; // We should be adding all-ones constant (i.e. subtracting one.) if (!isAllOnesConstant(Mask->getOperand(1))) return false; - // Match `1 << nbits`. Must only have one use! - SDValue M0 = Mask->getOperand(0); + // Match `1 << nbits`. Might be truncated. Must only have one use! + SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) return false; if (!isOneConstant(M0->getOperand(0))) @@ -3261,18 +3272,6 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { SDLoc DL(Node); - // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically* - // shifted (potentially with one-use trunc inbetween), - // and if so look past one-use truncation. - MVT XVT = NVT; - if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE && - X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) { - assert(NVT == MVT::i32 && "Expected target valuetype to be i32"); - X = X.getOperand(0); - XVT = X.getSimpleValueType(); - assert(XVT == MVT::i64 && "Expected truncation from i64"); - } - // Truncate the shift amount. NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); @@ -3288,18 +3287,31 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { if (Subtarget->hasBMI2()) { // Great, just emit the the BZHI.. - if (XVT != MVT::i32) { + if (NVT != MVT::i32) { // But have to place the bit count into the wide-enough register first. - NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, NBits); + NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits); insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); } - SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits); + SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); ReplaceNode(Node, Extract.getNode()); SelectCode(Extract.getNode()); return true; } + // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is + // *logically* shifted (potentially with one-use trunc inbetween), + // and the truncation was the only use of the shift, + // and if so look past one-use truncation. + { + SDValue RealX = peekThroughOneUseTruncation(X); + // FIXME: only if the shift is one-use? + if (RealX != X && RealX.getOpcode() == ISD::SRL) + X = RealX; + } + + MVT XVT = X.getSimpleValueType(); + // Else, emitting BEXTR requires one more step. // The 'control' of BEXTR has the pattern of: // [15...8 bit][ 7...0 bit] location @@ -3313,6 +3325,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { insertDAGNode(*CurDAG, SDValue(Node, 0), Control); // If the 'X' is *logically* shifted, we can fold that shift into 'control'. + // FIXME: only if the shift is one-use? if (X.getOpcode() == ISD::SRL) { SDValue ShiftAmt = X.getOperand(1); X = X.getOperand(0); diff --git a/test/CodeGen/X86/extract-bits.ll b/test/CodeGen/X86/extract-bits.ll index 254e006b1ce..c5c64b06146 100644 --- a/test/CodeGen/X86/extract-bits.ll +++ b/test/CodeGen/X86/extract-bits.ll @@ -1718,25 +1718,17 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; ; X64-BMI1NOTBM-LABEL: bextr64_32_a0: ; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rdi -; X64-BMI1NOTBM-NEXT: movl $1, %eax -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shlq %cl, %rax -; X64-BMI1NOTBM-NEXT: decl %eax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $8, %edx +; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax +; X64-BMI1NOTBM-NEXT: orl %edx, %eax +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax ; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax ; X64-BMI1NOTBM-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bextr64_32_a0: ; X64-BMI1BMI2: # %bb.0: ; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: movl $1, %ecx -; X64-BMI1BMI2-NEXT: shlxq %rdx, %rcx, %rcx -; X64-BMI1BMI2-NEXT: decl %ecx -; X64-BMI1BMI2-NEXT: andl %ecx, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax ; X64-BMI1BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits @@ -2198,25 +2190,17 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; ; X64-BMI1NOTBM-LABEL: bextr64_32_a3: ; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rdi -; X64-BMI1NOTBM-NEXT: movl $1, %eax -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shlq %cl, %rax -; X64-BMI1NOTBM-NEXT: decl %eax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $8, %edx +; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax +; X64-BMI1NOTBM-NEXT: orl %edx, %eax +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax ; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax ; X64-BMI1NOTBM-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bextr64_32_a3: ; X64-BMI1BMI2: # %bb.0: ; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: movl $1, %ecx -; X64-BMI1BMI2-NEXT: shlxq %rdx, %rcx, %rcx -; X64-BMI1BMI2-NEXT: decl %ecx -; X64-BMI1BMI2-NEXT: andl %ecx, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax ; X64-BMI1BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits diff --git a/test/CodeGen/X86/extract-lowbits.ll b/test/CodeGen/X86/extract-lowbits.ll index 7e65c1d5cc9..727b444f15f 100644 --- a/test/CodeGen/X86/extract-lowbits.ll +++ b/test/CodeGen/X86/extract-lowbits.ll @@ -791,22 +791,13 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; ; X64-BMI1NOTBM-LABEL: bzhi64_32_a0: ; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movl $1, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shlq %cl, %rax -; X64-BMI1NOTBM-NEXT: decl %eax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1NOTBM-NEXT: shll $8, %esi +; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax ; X64-BMI1NOTBM-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_32_a0: ; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: movl $1, %eax -; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax -; X64-BMI1BMI2-NEXT: decl %eax -; X64-BMI1BMI2-NEXT: andl %edi, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax ; X64-BMI1BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1086,22 +1077,13 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; ; X64-BMI1NOTBM-LABEL: bzhi64_32_a3: ; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movl $1, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shlq %cl, %rax -; X64-BMI1NOTBM-NEXT: decl %eax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1NOTBM-NEXT: shll $8, %esi +; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax ; X64-BMI1NOTBM-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_32_a3: ; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: movl $1, %eax -; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax -; X64-BMI1BMI2-NEXT: decl %eax -; X64-BMI1BMI2-NEXT: andl %edi, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax ; X64-BMI1BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, 4294967295 -- 2.40.0