createReplacer(X86::XOR32rr, X86::KXORDrr);
createReplacer(X86::XOR64rr, X86::KXORQrr);
- createReplacer(X86::TEST32rr, X86::KTESTDrr);
- createReplacer(X86::TEST64rr, X86::KTESTQrr);
+ // TODO: KTEST is not a replacement for TEST due to flag differences. Need
+ // to prove only Z flag is used.
+ //createReplacer(X86::TEST32rr, X86::KTESTDrr);
+ //createReplacer(X86::TEST64rr, X86::KTESTQrr);
}
if (STI->hasDQI()) {
createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
- createReplacer(X86::TEST8rr, X86::KTESTBrr);
- createReplacer(X86::TEST16rr, X86::KTESTWrr);
+ // TODO: KTEST is not a replacement for TEST due to flag differences. Need
+ // to prove only Z flag is used.
+ //createReplacer(X86::TEST8rr, X86::KTESTBrr);
+ //createReplacer(X86::TEST16rr, X86::KTESTWrr);
createReplacer(X86::XOR8rr, X86::KXORBrr);
}
return false;
}
-// Emit KTEST instruction for bit vectors on AVX-512
-static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- if (Op.getOpcode() == ISD::BITCAST) {
- auto hasKTEST = [&](MVT VT) {
- unsigned SizeInBits = VT.getSizeInBits();
- return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) ||
- (Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64));
- };
- SDValue Op0 = Op.getOperand(0);
- MVT Op0VT = Op0.getValueType().getSimpleVT();
- if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
- hasKTEST(Op0VT))
- return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
- }
- return SDValue();
-}
-
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
- // Emit KTEST for bit vectors
- if (auto Node = EmitKTEST(Op, DAG, Subtarget))
- return Node;
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
}
if (Opcode == 0) {
- // Emit KTEST for bit vectors
- if (auto Node = EmitKTEST(Op, DAG, Subtarget))
- return Node;
-
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
return Result;
}
+// Try to select this as a KTEST+SETCC if possible.
+static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Only support equality comparisons.
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return SDValue();
+
+ // Must be a bitcast from vXi1.
+ if (Op0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+ MVT VT = Op0.getSimpleValueType();
+ if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
+ !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
+ return SDValue();
+
+ X86::CondCode X86CC;
+ if (isNullConstant(Op1)) {
+ X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ } else
+ return SDValue();
+
+ SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
+ return getSETCC(X86CC, KTEST, dl, DAG);
+}
+
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
return NewSetCC;
}
+ // Try to lower using KTEST.
+ if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
+ return NewSetCC;
+
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
%ret = bitcast <8 x i1> %m2 to i8
ret i8 %ret
}
+
+; Make sure we don't emit a ktest for signed comparisons.
+define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
+; KNL-LABEL: ktest_signed:
+; KNL: ## %bb.0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: testw %ax, %ax
+; KNL-NEXT: jle LBB63_1
+; KNL-NEXT: ## %bb.2: ## %bb.2
+; KNL-NEXT: popq %rax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+; KNL-NEXT: LBB63_1: ## %bb.1
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: callq _foo
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: ktest_signed:
+; SKX: ## %bb.0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: testw %ax, %ax
+; SKX-NEXT: jle LBB63_1
+; SKX-NEXT: ## %bb.2: ## %bb.2
+; SKX-NEXT: popq %rax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+; SKX-NEXT: LBB63_1: ## %bb.1
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: callq _foo
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: ktest_signed:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: testw %ax, %ax
+; AVX512BW-NEXT: jle LBB63_1
+; AVX512BW-NEXT: ## %bb.2: ## %bb.2
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+; AVX512BW-NEXT: LBB63_1: ## %bb.1
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: callq _foo
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: ktest_signed:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: testw %ax, %ax
+; AVX512DQ-NEXT: jle LBB63_1
+; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+; AVX512DQ-NEXT: LBB63_1: ## %bb.1
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq _foo
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: retq
+ %a = icmp eq <16 x i32> %x, zeroinitializer
+ %b = icmp eq <16 x i32> %y, zeroinitializer
+ %c = and <16 x i1> %a, %b
+ %d = bitcast <16 x i1> %c to i16
+ %e = icmp sgt i16 %d, 0
+ br i1 %e, label %bb.2, label %bb.1
+bb.1:
+ call void @foo()
+ br label %bb.2
+bb.2:
+ ret void
+}
+declare void @foo()
+
body: |
; CHECK-LABEL: name: test_8bitops
; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPDZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPDZrrk]]
- ; CHECK: KTESTBrr [[KADDBrr]], [[KADDBrr]], implicit-def %eflags
- ; CHECK: JE_1 %bb.1, implicit %eflags
- ; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
%11 = VMOVAPDZrrk %2, killed %10, %1
VMOVAPDZmr %0, 1, %noreg, 0, %noreg, killed %11
- TEST8rr %18, %18, implicit-def %eflags
- JE_1 %bb.1, implicit %eflags
- JMP_1 %bb.2
+ ; FIXME We can't replace TEST with KTEST due to flag differences
+ ; TEST8rr %18, %18, implicit-def %eflags
+ ; JE_1 %bb.1, implicit %eflags
+ ; JMP_1 %bb.2
bb.1:
body: |
; CHECK-LABEL: name: test_16bitops
; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPSZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPSZrrk]]
- ; CHECK: KTESTWrr [[KXORWrr]], [[KXORWrr]], implicit-def %eflags
- ; CHECK: JE_1 %bb.1, implicit %eflags
- ; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
%11 = VMOVAPSZrrk %2, killed %10, %1
VMOVAPSZmr %0, 1, %noreg, 0, %noreg, killed %11
- TEST16rr %17, %17, implicit-def %eflags
- JE_1 %bb.1, implicit %eflags
- JMP_1 %bb.2
+ ; FIXME We can't replace TEST with KTEST due to flag differences
+ ; TEST16rr %17, %17, implicit-def %eflags
+ ; JE_1 %bb.1, implicit %eflags
+ ; JMP_1 %bb.2
bb.1:
body: |
; CHECK-LABEL: name: test_32bitops
; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU16Zrrk]]
- ; CHECK: KTESTDrr [[KADDDrr]], [[KADDDrr]], implicit-def %eflags
- ; CHECK: JE_1 %bb.1, implicit %eflags
- ; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
%4 = VMOVDQU16Zrrk %2, killed %3, %1
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
- TEST32rr %13, %13, implicit-def %eflags
- JE_1 %bb.1, implicit %eflags
- JMP_1 %bb.2
+ ; FIXME We can't replace TEST with KTEST due to flag differences
+ ; TEST32rr %13, %13, implicit-def %eflags
+ ; JE_1 %bb.1, implicit %eflags
+ ; JMP_1 %bb.2
bb.1:
body: |
; CHECK-LABEL: name: test_64bitops
; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU8Zrrk]]
- ; CHECK: KTESTQrr [[KADDQrr]], [[KADDQrr]], implicit-def %eflags
- ; CHECK: JE_1 %bb.1, implicit %eflags
- ; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
%4 = VMOVDQU8Zrrk %2, killed %3, %1
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
- TEST64rr %13, %13, implicit-def %eflags
- JE_1 %bb.1, implicit %eflags
- JMP_1 %bb.2
+ ; FIXME We can't replace TEST with KTEST due to flag differences
+ ; TEST64rr %13, %13, implicit-def %eflags
+ ; JE_1 %bb.1, implicit %eflags
+ ; JMP_1 %bb.2
bb.1: