return 2;
}
+SDValue
+X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+
+ assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) &&
+ "Unexpected divisor!");
+
+ // Only perform this transform if CMOV is supported otherwise the select
+ // below will become a branch.
+ if (!Subtarget.hasCMov())
+ return SDValue();
+
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ // FIXME: Support i8/i16.
+ if ((VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64)))
+ return SDValue();
+
+ unsigned Lg2 = Divisor.countTrailingZeros();
+
+ // If the divisor is 2 or -2, the default expansion is better.
+ if (Lg2 == 1)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
/// Result of 'and' is compared against zero. Change to a BT node if possible.
/// Returns the BT node and the condition code needed to use it.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
/// Reassociate floating point divisions into multiply by reciprocal.
unsigned combineRepeatedFPDivisors() const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
};
namespace X86 {
; CHECK-LABEL: combine_i32_sdiv_pow2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: sarl $31, %eax
-; CHECK-NEXT: shrl $28, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: leal 15(%rdi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %eax
; CHECK-NEXT: sarl $4, %eax
; CHECK-NEXT: retq
%1 = sdiv i32 %x, 16
; CHECK-LABEL: combine_i32_sdiv_negpow2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: sarl $31, %eax
-; CHECK-NEXT: shrl $24, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: leal 255(%rdi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %eax
; CHECK-NEXT: sarl $8, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: retq
define i64 @combine_i64_sdiv_pow2(i64 %x) {
; CHECK-LABEL: combine_i64_sdiv_pow2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: sarq $63, %rax
-; CHECK-NEXT: shrq $60, %rax
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: leaq 15(%rdi), %rax
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rax
; CHECK-NEXT: sarq $4, %rax
; CHECK-NEXT: retq
%1 = sdiv i64 %x, 16
define i64 @combine_i64_sdiv_negpow2(i64 %x) {
; CHECK-LABEL: combine_i64_sdiv_negpow2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: sarq $63, %rax
-; CHECK-NEXT: shrq $56, %rax
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: leaq 255(%rdi), %rax
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rax
; CHECK-NEXT: sarq $8, %rax
; CHECK-NEXT: negq %rax
; CHECK-NEXT: retq
; CHECK-LABEL: combine_srem_by_minsigned:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: sarl $31, %eax
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: leal 2147483647(%rdi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %eax
; CHECK-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
; CHECK-LABEL: combine_srem_pow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: sarl $31, %ecx
-; CHECK-NEXT: shrl $28, %ecx
-; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: leal 15(%rax), %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %ecx
; CHECK-NEXT: andl $-16, %ecx
; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%1 = srem i32 %x, 16
ret i32 %1
; CHECK-LABEL: combine_srem_negpow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: sarl $31, %ecx
-; CHECK-NEXT: shrl $24, %ecx
-; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: leal 255(%rax), %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %ecx
; CHECK-NEXT: andl $-256, %ecx
; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%1 = srem i32 %x, -256
ret i32 %1
; CHECK-LABEL: combine_i64_srem_pow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: sarq $63, %rcx
-; CHECK-NEXT: shrq $60, %rcx
-; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: leaq 15(%rdi), %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: andq $-16, %rcx
; CHECK-NEXT: subq %rcx, %rax
; CHECK-NEXT: retq
; CHECK-LABEL: combine_i64_srem_negpow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: sarq $63, %rcx
-; CHECK-NEXT: shrq $56, %rcx
-; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: leaq 255(%rdi), %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: andq $-256, %rcx
; CHECK-NEXT: subq %rcx, %rax
; CHECK-NEXT: retq
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: sarl $31, %ecx
-; CHECK-NEXT: shrl $24, %ecx
-; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: leal 255(%eax), %ecx
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovnsl %eax, %ecx
; CHECK-NEXT: andl $-256, %ecx
; CHECK-NEXT: subl %ecx, %eax
; CHECK-NEXT: retl
; X86-LABEL: test_srem_pow2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: shrl $28, %edx
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal 15(%ecx), %edx
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: cmovnsl %ecx, %edx
; X86-NEXT: andl $-16, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
;
; X64-LABEL: test_srem_pow2:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: sarl $31, %ecx
-; X64-NEXT: shrl $28, %ecx
-; X64-NEXT: addl %edi, %ecx
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 15(%rdi), %ecx
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: cmovnsl %edi, %ecx
; X64-NEXT: andl $-16, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %ecx, %edi
; X86-LABEL: test_srem_int_min:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: shrl %edx
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal 2147483647(%ecx), %edx
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: cmovnsl %ecx, %edx
; X86-NEXT: andl $-2147483648, %edx # imm = 0x80000000
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl %ecx, %edx
;
; X64-LABEL: test_srem_int_min:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: sarl $31, %ecx
-; X64-NEXT: shrl %ecx
-; X64-NEXT: addl %edi, %ecx
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 2147483647(%rdi), %ecx
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: cmovnsl %edi, %ecx
; X64-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: addl %edi, %ecx