auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+ auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
+ if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
+ assert(V.getSimpleValueType() == MVT::i32 &&
+ V.getOperand(0).getSimpleValueType() == MVT::i64 &&
+ "Expected i64 -> i32 truncation");
+ V = V.getOperand(0);
+ }
+ return V;
+ };
+
// a) x & ((1 << nbits) + (-1))
- auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ auto matchPatternA = [&checkOneUse, peekThroughOneUseTruncation,
+ &NBits](SDValue Mask) -> bool {
// Match `add`. Must only have one use!
if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
return false;
// We should be adding all-ones constant (i.e. subtracting one.)
if (!isAllOnesConstant(Mask->getOperand(1)))
return false;
- // Match `1 << nbits`. Must only have one use!
- SDValue M0 = Mask->getOperand(0);
+ // Match `1 << nbits`. Might be truncated. Must only have one use!
+ SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
return false;
if (!isOneConstant(M0->getOperand(0)))
SDLoc DL(Node);
- // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
- // shifted (potentially with one-use trunc inbetween),
- // and if so look past one-use truncation.
- MVT XVT = NVT;
- if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
- X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
- assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
- X = X.getOperand(0);
- XVT = X.getSimpleValueType();
- assert(XVT == MVT::i64 && "Expected truncation from i64");
- }
-
// Truncate the shift amount.
NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
if (Subtarget->hasBMI2()) {
// Great, just emit the the BZHI..
- if (XVT != MVT::i32) {
+ if (NVT != MVT::i32) {
// But have to place the bit count into the wide-enough register first.
- NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, NBits);
+ NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
}
- SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
+ SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
ReplaceNode(Node, Extract.getNode());
SelectCode(Extract.getNode());
return true;
}
+ // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
+ // *logically* shifted (potentially with one-use trunc inbetween),
+ // and the truncation was the only use of the shift,
+ // and if so look past one-use truncation.
+ {
+ SDValue RealX = peekThroughOneUseTruncation(X);
+ // FIXME: only if the shift is one-use?
+ if (RealX != X && RealX.getOpcode() == ISD::SRL)
+ X = RealX;
+ }
+
+ MVT XVT = X.getSimpleValueType();
+
// Else, emitting BEXTR requires one more step.
// The 'control' of BEXTR has the pattern of:
// [15...8 bit][ 7...0 bit] location
insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
// If the 'X' is *logically* shifted, we can fold that shift into 'control'.
+ // FIXME: only if the shift is one-use?
if (X.getOpcode() == ISD::SRL) {
SDValue ShiftAmt = X.getOperand(1);
X = X.getOperand(0);
;
; X64-BMI1NOTBM-LABEL: bextr64_32_a0:
; X64-BMI1NOTBM: # %bb.0:
-; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT: shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT: movl $1, %eax
-; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT: shlq %cl, %rax
-; X64-BMI1NOTBM-NEXT: decl %eax
-; X64-BMI1NOTBM-NEXT: andl %edi, %eax
+; X64-BMI1NOTBM-NEXT: shll $8, %edx
+; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax
+; X64-BMI1NOTBM-NEXT: orl %edx, %eax
+; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax
; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax
; X64-BMI1NOTBM-NEXT: retq
;
; X64-BMI1BMI2-LABEL: bextr64_32_a0:
; X64-BMI1BMI2: # %bb.0:
; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT: movl $1, %ecx
-; X64-BMI1BMI2-NEXT: shlxq %rdx, %rcx, %rcx
-; X64-BMI1BMI2-NEXT: decl %ecx
-; X64-BMI1BMI2-NEXT: andl %ecx, %eax
-; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax
; X64-BMI1BMI2-NEXT: retq
%shifted = lshr i64 %val, %numskipbits
%onebit = shl i64 1, %numlowbits
;
; X64-BMI1NOTBM-LABEL: bextr64_32_a3:
; X64-BMI1NOTBM: # %bb.0:
-; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT: shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT: movl $1, %eax
-; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT: shlq %cl, %rax
-; X64-BMI1NOTBM-NEXT: decl %eax
-; X64-BMI1NOTBM-NEXT: andl %edi, %eax
+; X64-BMI1NOTBM-NEXT: shll $8, %edx
+; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax
+; X64-BMI1NOTBM-NEXT: orl %edx, %eax
+; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax
; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax
; X64-BMI1NOTBM-NEXT: retq
;
; X64-BMI1BMI2-LABEL: bextr64_32_a3:
; X64-BMI1BMI2: # %bb.0:
; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT: movl $1, %ecx
-; X64-BMI1BMI2-NEXT: shlxq %rdx, %rcx, %rcx
-; X64-BMI1BMI2-NEXT: decl %ecx
-; X64-BMI1BMI2-NEXT: andl %ecx, %eax
-; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax
; X64-BMI1BMI2-NEXT: retq
%shifted = lshr i64 %val, %numskipbits
%onebit = shl i64 1, %numlowbits
;
; X64-BMI1NOTBM-LABEL: bzhi64_32_a0:
; X64-BMI1NOTBM: # %bb.0:
-; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT: movl $1, %eax
-; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT: shlq %cl, %rax
-; X64-BMI1NOTBM-NEXT: decl %eax
-; X64-BMI1NOTBM-NEXT: andl %edi, %eax
-; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT: shll $8, %esi
+; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax
; X64-BMI1NOTBM-NEXT: retq
;
; X64-BMI1BMI2-LABEL: bzhi64_32_a0:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $1, %eax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: decl %eax
-; X64-BMI1BMI2-NEXT: andl %edi, %eax
-; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%onebit = shl i64 1, %numlowbits
%mask = add nsw i64 %onebit, -1
;
; X64-BMI1NOTBM-LABEL: bzhi64_32_a3:
; X64-BMI1NOTBM: # %bb.0:
-; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT: movl $1, %eax
-; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT: shlq %cl, %rax
-; X64-BMI1NOTBM-NEXT: decl %eax
-; X64-BMI1NOTBM-NEXT: andl %edi, %eax
-; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT: shll $8, %esi
+; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax
; X64-BMI1NOTBM-NEXT: retq
;
; X64-BMI1BMI2-LABEL: bzhi64_32_a3:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $1, %eax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: decl %eax
-; X64-BMI1BMI2-NEXT: andl %edi, %eax
-; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%onebit = shl i64 1, %numlowbits
%mask = add nsw i64 %onebit, 4294967295