}
}
+ // Sometimes flags can be set either with an AND or with an SRL/SHL
+ // instruction. SRL/SHL variant should be preferred for masks longer than this
+ // number of bits.
+ const int ShiftToAndMaxMaskWidth = 32;
+ const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
+
// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
// a TEST instruction later.
- if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() &&
+ if (ZeroCheck && Op->hasOneUse() &&
isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
APInt Mask = ArithOp.getOpcode() == ISD::SRL
? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
: APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
- if (!Mask.isSignedIntN(32)) // Avoid large immediates.
+ if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
break;
Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
DAG.getConstant(Mask, dl, VT));
case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better.
+ // because a TEST instruction will be better. However, AND should be
+ // preferred if the instruction can be combined into ANDN.
if (!hasNonFlagsUse(Op)) {
SDValue Op0 = ArithOp->getOperand(0);
SDValue Op1 = ArithOp->getOperand(1);
EVT VT = ArithOp.getValueType();
bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
+ bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
+
+ // If we cannot select an ANDN instruction, check if we can replace
+ // AND+IMM64 with a shift before giving up. This is possible for masks
+ // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
+ if (!isProperAndn) {
+ if (!ZeroCheck)
+ break;
+
+ assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
+ auto *CN = dyn_cast<ConstantSDNode>(Op1);
+ if (!CN)
+ break;
+
+ const APInt &Mask = CN->getAPIntValue();
+ if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
+ break; // Prefer TEST instruction.
+
+ unsigned BitWidth = Mask.getBitWidth();
+ unsigned LeadingOnes = Mask.countLeadingOnes();
+ unsigned TrailingZeros = Mask.countTrailingZeros();
+
+ if (LeadingOnes + TrailingZeros == BitWidth) {
+ assert(TrailingZeros < VT.getSizeInBits() &&
+ "Shift amount should be less than the type width");
+ MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
+ SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
+ Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
+ break;
+ }
+
+ unsigned LeadingZeros = Mask.countLeadingZeros();
+ unsigned TrailingOnes = Mask.countTrailingOnes();
+
+ if (LeadingZeros + TrailingOnes == BitWidth) {
+ assert(LeadingZeros < VT.getSizeInBits() &&
+ "Shift amount should be less than the type width");
+ MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
+ SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
+ Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
+ break;
+ }
- // But if we can combine this into an ANDN operation, then create an AND
- // now and allow it to be pattern matched into an ANDN.
- if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType)
break;
+ }
}
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: {
- if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
+ if (!NeedTruncation && ZeroCheck) {
if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG))
return EFLAGS;
}
; CHECK: # BB#0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
-; CHECK-NEXT: testq %rcx, %rax
+; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # BB#2:
; CHECK-NEXT: movq %rdi, %rax
; CHECK: # BB#0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
-; CHECK-NEXT: testq %rcx, %rax
+; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: je .LBB1_1
; CHECK-NEXT: # BB#2:
; CHECK-NEXT: movq %rdi, %rax
; CHECK: # BB#0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
-; CHECK-NEXT: testq %rcx, %rax
+; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: je .LBB2_1
; CHECK-NEXT: # BB#2:
; CHECK-NEXT: movq %rdi, %rax
; CHECK: setne
; CHECK: testl
; CHECK: setne
-}
\ No newline at end of file
+}
+
+define i32 @test21(i64 %val) {
+ %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000
+ %cmp = icmp ne i64 %and, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+
+; CHECK-LABEL: test21
+; CHECK: shrq $41, %rdi
+; CHECK-NOT: test
+; CHECK: setne %al
+; CHECK: retq
+}
+
+; AND-to-SHR transformation is enabled for eq/ne condition codes only.
+define i32 @test22(i64 %val) {
+ %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000
+ %cmp = icmp ult i64 %and, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+
+; CHECK-LABEL: test22
+; CHECK-NOT: shrq $41
+; CHECK: retq
+}
+
+define i32 @test23(i64 %val) {
+ %and = and i64 %val, -1048576 ; 0xFFFFFFFFFFF00000
+ %cmp = icmp ne i64 %and, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+
+; CHECK-LABEL: test23
+; CHECK: testq $-1048576, %rdi
+; CHECK: setne %al
+; CHECK: retq
+}
+
+define i32 @test24(i64 %val) {
+ %and = and i64 %val, 281474976710655 ; 0x0000FFFFFFFFFFFF
+ %cmp = icmp ne i64 %and, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+
+; CHECK-LABEL: test24
+; CHECK: shlq $16, %rdi
+; CHECK-NOT: test
+; CHECK: setne %al
+; CHECK: retq
+}