From: Juergen Ributzka Date: Mon, 23 Jun 2014 21:55:40 +0000 (+0000) Subject: [FastISel][X86] Add support for floating-point select. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d0976a3d200016d49448d0b08234220a7f07eb3d;p=llvm [FastISel][X86] Add support for floating-point select. This extends the select lowering to support floating-point selects. The lowering depends on SSE instructions and that the conditon comes from a floating-point compare. Under this conditions it is possible to emit an optimized instruction sequence that doesn't require any branches to simulate the select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211544 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3c9acba5cbe..af9eaf32a33 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -113,6 +113,8 @@ private: bool X86FastEmitCMoveSelect(const Instruction *I); + bool X86FastEmitSSESelect(const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -235,6 +237,41 @@ getX86ConditonCode(CmpInst::Predicate Predicate) { return std::make_pair(CC, NeedSwap); } +static std::pair +getX86SSECondtionCode(CmpInst::Predicate Predicate) { + unsigned CC; + bool NeedSwap = false; + + // SSE Condition code mapping: + // 0 - EQ + // 1 - LT + // 2 - LE + // 3 - UNORD + // 4 - NEQ + // 5 - NLT + // 6 - NLE + // 7 - ORD + switch (Predicate) { + default: llvm_unreachable("Unexpected predicate"); + case CmpInst::FCMP_OEQ: CC = 0; break; + case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through + case CmpInst::FCMP_OLT: CC = 1; break; + case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through + case CmpInst::FCMP_OLE: CC = 2; break; + case CmpInst::FCMP_UNO: CC = 3; break; + case CmpInst::FCMP_UNE: CC = 4; break; + case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through + case CmpInst::FCMP_UGE: CC = 5; break; + case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through + case CmpInst::FCMP_UGT: CC = 6; break; + case CmpInst::FCMP_ORD: CC = 7; break; + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_ONE: CC = 8; break; + } + + return std::make_pair(CC, NeedSwap); +} + bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) @@ -1728,6 +1765,93 @@ bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) { return true; } +/// \brief Emit SSE instructions to lower the select. +/// +/// Try to use SSE1/SSE2 instructions to simulate a select without branches. +/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary +/// SSE instructions are available. +bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) { + MVT RetVT; + if (!isTypeLegal(I->getType(), RetVT)) + return false; + + const auto *CI = dyn_cast(I->getOperand(0)); + if (!CI) + return false; + + if (I->getType() != CI->getOperand(0)->getType() || + !((Subtarget->hasSSE1() && RetVT == MVT::f32) || + (Subtarget->hasSSE2() && RetVT == MVT::f64) )) + return false; + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. + // We don't have to materialize a zero constant for this case and can just use + // %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *CmpRHSC = dyn_cast(CmpRHS); + if (CmpRHSC && CmpRHSC->isNullValue()) + CmpRHS = CmpLHS; + } + + unsigned CC; + bool NeedSwap; + std::tie(CC, NeedSwap) = getX86SSECondtionCode(Predicate); + if (CC > 7) + return false; + + if (NeedSwap) + std::swap(CmpLHS, CmpRHS); + + static unsigned OpcTable[2][2][4] = { + { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, + { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, + { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, + { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } + }; + + bool HasAVX = Subtarget->hasAVX(); + unsigned *Opc = nullptr; + switch (RetVT.SimpleTy) { + default: return false; + case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; + case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; + } + + const Value *LHS = I->getOperand(1); + const Value *RHS = I->getOperand(2); + + unsigned LHSReg = getRegForValue(LHS); + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned RHSReg = getRegForValue(RHS); + bool RHSIsKill = hasTrivialKill(RHS); + + unsigned CmpLHSReg = getRegForValue(CmpLHS); + bool CmpLHSIsKill = hasTrivialKill(CmpLHS); + + unsigned CmpRHSReg = getRegForValue(CmpRHS); + bool CmpRHSIsKill = hasTrivialKill(CmpRHS); + + if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) + return false; + + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, + LHSReg, LHSIsKill); + unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, + RHSReg, RHSIsKill); + unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, + AndReg, /*IsKill=*/true); + UpdateValueMap(I, ResultReg); + return true; +} + bool X86FastISel::X86SelectSelect(const Instruction *I) { MVT RetVT; if (!isTypeLegal(I->getType(), RetVT)) @@ -1762,6 +1886,10 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { if (X86FastEmitCMoveSelect(I)) return true; + // Try to use a sequence of SSE instructions to simulate a conditonal move. + if (X86FastEmitSSESelect(I)) + return true; + return false; } diff --git a/test/CodeGen/X86/fast-isel-select-sse.ll b/test/CodeGen/X86/fast-isel-select-sse.ll new file mode 100644 index 00000000000..3c03a0312f5 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-select-sse.ll @@ -0,0 +1,391 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX + +; Test all cmp predicates that can be used with SSE. + +define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_oeq_f32 +; CHECK: cmpeqss %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_oeq_f32 +; AVX: vcmpeqss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp oeq float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_oeq_f64 +; CHECK: cmpeqsd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_oeq_f64 +; AVX: vcmpeqsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp oeq double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_ogt_f32 +; CHECK: cmpltss %xmm0, %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm1 +; CHECK-NEXT: orps %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_ogt_f32 +; AVX: vcmpltss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp ogt float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_ogt_f64 +; CHECK: cmpltsd %xmm0, %xmm1 +; CHECK-NEXT: andpd %xmm1, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm1 +; CHECK-NEXT: orpd %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_ogt_f64 +; AVX: vcmpltsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp ogt double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_oge_f32 +; CHECK: cmpless %xmm0, %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm1 +; CHECK-NEXT: orps %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_oge_f32 +; AVX: vcmpless %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp oge float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_oge_f64 +; CHECK: cmplesd %xmm0, %xmm1 +; CHECK-NEXT: andpd %xmm1, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm1 +; CHECK-NEXT: orpd %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_oge_f64 +; AVX: vcmplesd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp oge double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_olt_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_olt_f32 +; CHECK: cmpltss %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_olt_f32 +; AVX: vcmpltss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp olt float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_olt_f64 +; CHECK: cmpltsd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_olt_f64 +; AVX: vcmpltsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp olt double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_ole_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_ole_f32 +; CHECK: cmpless %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_ole_f32 +; AVX: vcmpless %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp ole float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_ole_f64 +; CHECK: cmplesd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_ole_f64 +; AVX: vcmplesd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp ole double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_ord_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_ord_f32 +; CHECK: cmpordss %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_ord_f32 +; AVX: vcmpordss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp ord float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_ord_f64 +; CHECK: cmpordsd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_ord_f64 +; AVX: vcmpordsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp ord double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_uno_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_uno_f32 +; CHECK: cmpunordss %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_uno_f32 +; AVX: vcmpunordss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp uno float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_uno_f64 +; CHECK: cmpunordsd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_uno_f64 +; AVX: vcmpunordsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp uno double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_ugt_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_ugt_f32 +; CHECK: cmpnless %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_ugt_f32 +; AVX: vcmpnless %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp ugt float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_ugt_f64 +; CHECK: cmpnlesd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_ugt_f64 +; AVX: vcmpnlesd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp ugt double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_uge_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_uge_f32 +; CHECK: cmpnltss %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_uge_f32 +; AVX: vcmpnltss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp uge float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_uge_f64 +; CHECK: cmpnltsd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_uge_f64 +; AVX: vcmpnltsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp uge double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_ult_f32 +; CHECK: cmpnless %xmm0, %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm1 +; CHECK-NEXT: orps %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_ult_f32 +; AVX: vcmpnless %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp ult float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_ult_f64 +; CHECK: cmpnlesd %xmm0, %xmm1 +; CHECK-NEXT: andpd %xmm1, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm1 +; CHECK-NEXT: orpd %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_ult_f64 +; AVX: vcmpnlesd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp ult double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_ule_f32 +; CHECK: cmpnltss %xmm0, %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm1 +; CHECK-NEXT: orps %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_ule_f32 +; AVX: vcmpnltss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp ule float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_ule_f64 +; CHECK: cmpnltsd %xmm0, %xmm1 +; CHECK-NEXT: andpd %xmm1, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm1 +; CHECK-NEXT: orpd %xmm2, %xmm1 +; AVX-LABEL: select_fcmp_ule_f64 +; AVX: vcmpnltsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp ule double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} + +define float @select_fcmp_une_f32(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: select_fcmp_une_f32 +; CHECK: cmpneqss %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm0, %xmm2 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_une_f32 +; AVX: vcmpneqss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 + %1 = fcmp une float %a, %b + %2 = select i1 %1, float %c, float %d + ret float %2 +} + +define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: select_fcmp_une_f64 +; CHECK: cmpneqsd %xmm1, %xmm0 +; CHECK-NEXT: andpd %xmm0, %xmm2 +; CHECK-NEXT: andnpd %xmm3, %xmm0 +; CHECK-NEXT: orpd %xmm2, %xmm0 +; AVX-LABEL: select_fcmp_une_f64 +; AVX: vcmpneqsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 + %1 = fcmp une double %a, %b + %2 = select i1 %1, double %c, double %d + ret double %2 +} +