case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_UGT: CC = 6; break;
case CmpInst::FCMP_ORD: CC = 7; break;
- case CmpInst::FCMP_UEQ:
- case CmpInst::FCMP_ONE: CC = 8; break;
+ case CmpInst::FCMP_UEQ: CC = 8; break;
+ case CmpInst::FCMP_ONE: CC = 12; break;
}
return std::make_pair(CC, NeedSwap);
unsigned CC;
bool NeedSwap;
std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
- if (CC > 7)
+ if (CC > 7 && !Subtarget->hasAVX())
return false;
if (NeedSwap)
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
/// CMPs.
-static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
- SDValue &Op1) {
+static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
+ SDValue &Op1) {
unsigned SSECC;
bool Swap = false;
case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
- case ISD::SETUEQ:
- case ISD::SETONE: SSECC = 8; break;
+ case ISD::SETUEQ: SSECC = 8; break;
+ case ISD::SETONE: SSECC = 12; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
// emit two comparisons and a logic op to tie them together.
- // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is
- // available.
SDValue Cmp;
unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
- if (SSECC == 8) {
+ if (SSECC >= 8 && !Subtarget.hasAVX()) {
// LLVM predicate is SETUEQ or SETONE.
unsigned CC0, CC1;
unsigned CombineOpc;
(Subtarget.hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
- int SSECC = translateX86FSETCC(
+ unsigned SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
- if (SSECC != 8) {
- if (Subtarget.hasAVX512()) {
- SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
- CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
- assert(!VT.isVector() && "Not a scalar type?");
- return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
- }
+ if (Subtarget.hasAVX512()) {
+ SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
+ CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
+ assert(!VT.isVector() && "Not a scalar type?");
+ return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
+ }
+ if (SSECC < 8 || Subtarget.hasAVX()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, DL, MVT::i8));
; CHECK-LABEL: test40:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
-; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
-; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
+; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: test41:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
-; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
-; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
+; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: test42:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
-; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
-; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
+; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: test43:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
-; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
-; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
+; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: test44:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
-; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
-; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
+; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: test45:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
-; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
-; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
+; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: test46:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
-; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
-; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
+; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: test47:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
-; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
-; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
+; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: mov_test40:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: mov_test41:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: mov_test42:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: mov_test43:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
; CHECK-LABEL: mov_test44:
; CHECK: # BB#0:
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: mov_test45:
; CHECK: # BB#0:
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: mov_test46:
; CHECK: # BB#0:
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: mov_test47:
; CHECK: # BB#0:
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00]
+; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; CHECK-NEXT: retq # sched: [7:1.00]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
; CHECK-LABEL: test_256_25:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
-; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
-; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
+; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
; CHECK-LABEL: test_256_26:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
-; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
-; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
+; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
; CHECK-LABEL: test_256_27:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
-; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
-; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
+; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
; CHECK-LABEL: test_256_28:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
-; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
-; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
+; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
; AVX-LABEL: commute_cmpps_ueq:
; AVX: # BB#0:
; AVX-NEXT: vmovaps (%rdi), %xmm1
-; AVX-NEXT: vcmpeqps %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vorps %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vcmpeq_uqps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ueq:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
-; AVX512-NEXT: vcmpeqps %xmm0, %xmm1, %k0
-; AVX512-NEXT: vcmpunordps %xmm0, %xmm1, %k1
-; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vcmpeq_uqps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmpps_one:
; AVX: # BB#0:
; AVX-NEXT: vmovaps (%rdi), %xmm1
-; AVX-NEXT: vcmpneqps %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpordps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vcmpneq_oqps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_one:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
-; AVX512-NEXT: vcmpordps %xmm0, %xmm1, %k1
-; AVX512-NEXT: vcmpneqps %xmm0, %xmm1, %k1 {%k1}
+; AVX512-NEXT: vcmpneq_oqps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmpps_ueq_ymm:
; AVX: # BB#0:
; AVX-NEXT: vmovaps (%rdi), %ymm1
-; AVX-NEXT: vcmpeqps %ymm0, %ymm1, %ymm2
-; AVX-NEXT: vcmpunordps %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vcmpeq_uqps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ueq_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
-; AVX512-NEXT: vcmpeqps %ymm0, %ymm1, %k0
-; AVX512-NEXT: vcmpunordps %ymm0, %ymm1, %k1
-; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vcmpeq_uqps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmpps_one_ymm:
; AVX: # BB#0:
; AVX-NEXT: vmovaps (%rdi), %ymm1
-; AVX-NEXT: vcmpneqps %ymm0, %ymm1, %ymm2
-; AVX-NEXT: vcmpordps %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vcmpneq_oqps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_one_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
-; AVX512-NEXT: vcmpordps %ymm0, %ymm1, %k1
-; AVX512-NEXT: vcmpneqps %ymm0, %ymm1, %k1 {%k1}
+; AVX512-NEXT: vcmpneq_oqps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmppd_ueq:
; AVX: # BB#0:
; AVX-NEXT: vmovapd (%rdi), %xmm1
-; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vorpd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vcmpeq_uqpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ueq:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
-; AVX512-NEXT: vcmpeqpd %xmm0, %xmm1, %k0
-; AVX512-NEXT: vcmpunordpd %xmm0, %xmm1, %k1
-; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vcmpeq_uqpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmppd_one:
; AVX: # BB#0:
; AVX-NEXT: vmovapd (%rdi), %xmm1
-; AVX-NEXT: vcmpneqpd %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpordpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vcmpneq_oqpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_one:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
-; AVX512-NEXT: vcmpordpd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vcmpneqpd %xmm0, %xmm1, %k1 {%k1}
+; AVX512-NEXT: vcmpneq_oqpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmppd_ueq_ymmm:
; AVX: # BB#0:
; AVX-NEXT: vmovapd (%rdi), %ymm1
-; AVX-NEXT: vcmpeqpd %ymm0, %ymm1, %ymm2
-; AVX-NEXT: vcmpunordpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vorpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vcmpeq_uqpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ueq_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
-; AVX512-NEXT: vcmpeqpd %ymm0, %ymm1, %k0
-; AVX512-NEXT: vcmpunordpd %ymm0, %ymm1, %k1
-; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: vcmpeq_uqpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
; AVX-LABEL: commute_cmppd_one_ymmm:
; AVX: # BB#0:
; AVX-NEXT: vmovapd (%rdi), %ymm1
-; AVX-NEXT: vcmpneqpd %ymm0, %ymm1, %ymm2
-; AVX-NEXT: vcmpordpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vandpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vcmpneq_oqpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_one_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
-; AVX512-NEXT: vcmpordpd %ymm0, %ymm1, %k1
-; AVX512-NEXT: vcmpneqpd %ymm0, %ymm1, %k1 {%k1}
+; AVX512-NEXT: vcmpneq_oqpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
;
; AVX-LABEL: select_fcmp_one_f32:
; AVX: ## BB#0:
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: jne LBB0_2
-; AVX-NEXT: ## BB#1:
-; AVX-NEXT: vmovaps %xmm3, %xmm2
-; AVX-NEXT: LBB0_2:
-; AVX-NEXT: vmovaps %xmm2, %xmm0
+; AVX-NEXT: vcmpneq_oqss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
; AVX-NEXT: retq
%1 = fcmp one float %a, %b
%2 = select i1 %1, float %c, float %d
;
; AVX-LABEL: select_fcmp_one_f64:
; AVX: ## BB#0:
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: jne LBB1_2
-; AVX-NEXT: ## BB#1:
-; AVX-NEXT: vmovaps %xmm3, %xmm2
-; AVX-NEXT: LBB1_2:
-; AVX-NEXT: vmovaps %xmm2, %xmm0
+; AVX-NEXT: vcmpneq_oqsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
; AVX-NEXT: retq
%1 = fcmp one double %a, %b
%2 = select i1 %1, double %c, double %d