From: Simon Pilgrim Date: Sun, 20 Jan 2019 13:21:43 +0000 (+0000) Subject: [CostModel][X86] Add explicit fcmp costs for pre-SSE42 targets X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=38ece52d643c8cab5e2d8448333fcaee4e405e87;p=llvm [CostModel][X86] Add explicit fcmp costs for pre-SSE42 targets Typical throughputs: cmpss/cmpps = 1cy and cmpsd/cmppd = 2cy before the Core2 era git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351684 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 443007969eb..1d94eed2372 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1686,12 +1686,19 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, }; static const CostTblEntry SSE2CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 2 }, + { ISD::SETCC, MVT::f64, 1 }, { ISD::SETCC, MVT::v2i64, 8 }, { ISD::SETCC, MVT::v4i32, 1 }, { ISD::SETCC, MVT::v8i16, 1 }, { ISD::SETCC, MVT::v16i8, 1 }, }; + static const CostTblEntry SSE1CostTbl[] = { + { ISD::SETCC, MVT::v4f32, 2 }, + { ISD::SETCC, MVT::f32, 1 }, + }; + if (ST->hasBWI()) if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) return LT.first * Entry->Cost; @@ -1716,6 +1723,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) return LT.first * Entry->Cost; + if (ST->hasSSE1()) + if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } diff --git a/test/Analysis/CostModel/X86/fcmp.ll b/test/Analysis/CostModel/X86/fcmp.ll index e900dc7f50f..5ec8928b02b 100644 --- a/test/Analysis/CostModel/X86/fcmp.ll +++ b/test/Analysis/CostModel/X86/fcmp.ll @@ -16,54 +16,54 @@ define i32 @cmp_float_oeq(i32 %arg) { ; SSE2-LABEL: 'cmp_float_oeq' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oeq float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oeq double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_oeq' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oeq float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oeq double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_oeq' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oeq float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oeq double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_oeq' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oeq float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oeq <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oeq <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oeq <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oeq <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oeq double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oeq <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oeq <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oeq <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oeq <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_oeq' @@ -136,54 +136,54 @@ define i32 @cmp_float_oeq(i32 %arg) { define i32 @cmp_float_one(i32 %arg) { ; SSE2-LABEL: 'cmp_float_one' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_one' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_one' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_one' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_one' @@ -256,54 +256,54 @@ define i32 @cmp_float_one(i32 %arg) { define i32 @cmp_float_ord(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ord' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ord float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ord double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ord' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ord float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ord double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ord' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ord float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ord double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ord' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ord float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ord <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ord <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ord <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ord <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ord double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ord <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ord <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ord <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ord <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ord' @@ -376,54 +376,54 @@ define i32 @cmp_float_ord(i32 %arg) { define i32 @cmp_float_oge(i32 %arg) { ; SSE2-LABEL: 'cmp_float_oge' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oge float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oge double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_oge' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oge float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oge double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_oge' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oge float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oge double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_oge' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oge float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp oge <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp oge <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp oge <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp oge <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp oge double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp oge <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp oge <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp oge <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp oge <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_oge' @@ -496,54 +496,54 @@ define i32 @cmp_float_oge(i32 %arg) { define i32 @cmp_float_ogt(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ogt' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ogt float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ogt double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ogt' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ogt float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ogt double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ogt' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ogt float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ogt double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ogt' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ogt float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ogt <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ogt <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ogt <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ogt <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ogt double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ogt <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ogt <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ogt <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ogt <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ogt' @@ -616,54 +616,54 @@ define i32 @cmp_float_ogt(i32 %arg) { define i32 @cmp_float_ole(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ole' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ole float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ole double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ole' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ole float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ole double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ole' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ole float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ole double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ole' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ole float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ole <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ole <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ole <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ole <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ole double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ole <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ole <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ole <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ole <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ole' @@ -736,54 +736,54 @@ define i32 @cmp_float_ole(i32 %arg) { define i32 @cmp_float_olt(i32 %arg) { ; SSE2-LABEL: 'cmp_float_olt' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp olt float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp olt double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_olt' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp olt float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp olt double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_olt' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp olt float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp olt double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_olt' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp olt float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp olt double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_olt' @@ -856,54 +856,54 @@ define i32 @cmp_float_olt(i32 %arg) { define i32 @cmp_float_ueq(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ueq' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ueq' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ueq' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ueq' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ueq' @@ -976,54 +976,54 @@ define i32 @cmp_float_ueq(i32 %arg) { define i32 @cmp_float_une(i32 %arg) { ; SSE2-LABEL: 'cmp_float_une' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp une float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp une <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp une <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp une <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp une <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp une <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp une <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp une <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp une <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp une double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp une <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp une <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp une <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp une <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp une <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp une <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp une <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp une <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_une' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp une float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp une <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp une <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp une <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp une <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp une <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp une <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp une <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp une <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp une double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp une <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp une <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp une <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp une <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp une <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp une <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp une <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp une <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_une' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp une float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp une <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp une <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp une <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp une <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp une <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp une <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp une <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp une <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp une double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp une <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp une <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp une <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp une <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp une <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp une <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp une <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp une <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_une' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp une float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp une <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp une <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp une <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp une <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp une <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp une <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp une <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp une <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp une double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp une <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp une <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp une <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp une <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp une <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp une <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp une <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp une <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_une' @@ -1096,54 +1096,54 @@ define i32 @cmp_float_une(i32 %arg) { define i32 @cmp_float_uno(i32 %arg) { ; SSE2-LABEL: 'cmp_float_uno' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uno float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uno double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_uno' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uno float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uno double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_uno' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uno float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uno double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_uno' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uno float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uno <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uno <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uno <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uno <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uno double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uno <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uno <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uno <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uno <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_uno' @@ -1216,54 +1216,54 @@ define i32 @cmp_float_uno(i32 %arg) { define i32 @cmp_float_uge(i32 %arg) { ; SSE2-LABEL: 'cmp_float_uge' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uge float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uge double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_uge' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uge float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uge double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_uge' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uge float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uge double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_uge' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp uge float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp uge <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp uge <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp uge <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp uge <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp uge double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp uge <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp uge <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp uge <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp uge <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_uge' @@ -1336,54 +1336,54 @@ define i32 @cmp_float_uge(i32 %arg) { define i32 @cmp_float_ugt(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ugt' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ugt float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ugt double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ugt' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ugt float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ugt double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ugt' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ugt float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ugt double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ugt' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ugt float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ugt <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ugt <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ugt <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ugt <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ugt double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ugt <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ugt <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ugt <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ugt <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ugt' @@ -1456,54 +1456,54 @@ define i32 @cmp_float_ugt(i32 %arg) { define i32 @cmp_float_ule(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ule' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ule float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ule double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ule' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ule float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ule double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ule' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ule float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ule double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ule' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ule float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ule <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ule <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ule <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ule <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ule double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ule <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ule <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ule <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ule <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ule' @@ -1576,54 +1576,54 @@ define i32 @cmp_float_ule(i32 %arg) { define i32 @cmp_float_ult(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ult' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ult float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ult double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_ult' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ult float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ult double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_ult' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ult float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ult double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ult' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ult float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ult <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ult <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ult <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ult <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ult double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ult <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ult <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ult <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ult <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_ult' @@ -1696,54 +1696,54 @@ define i32 @cmp_float_ult(i32 %arg) { define i32 @cmp_float_false(i32 %arg) { ; SSE2-LABEL: 'cmp_float_false' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp false float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp false <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp false <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp false <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp false <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp false <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp false <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp false <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp false <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp false double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp false <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp false <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp false <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp false <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp false <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp false <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp false <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp false <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_false' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp false float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp false <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp false <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp false <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp false <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp false <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp false <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp false <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp false <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp false double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp false <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp false <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp false <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp false <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp false <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp false <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp false <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp false <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_false' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp false float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp false <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp false <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp false <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp false <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp false <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp false <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp false <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp false <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp false double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp false <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp false <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp false <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp false <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp false <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp false <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp false <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp false <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_false' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp false float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp false <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp false <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp false <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp false <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp false <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp false <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp false <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp false <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp false double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp false <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp false <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp false <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp false <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp false <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp false <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp false <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp false <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_false' @@ -1816,54 +1816,54 @@ define i32 @cmp_float_false(i32 %arg) { define i32 @cmp_float_true(i32 %arg) { ; SSE2-LABEL: 'cmp_float_true' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp true float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp true <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp true <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp true <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp true <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp true <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp true <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp true <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp true <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp true double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp true <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp true <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp true <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp true <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp true <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp true <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp true <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp true <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'cmp_float_true' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp true float undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp true <2 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp true <4 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp true <8 x float> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp true <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp true <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp true <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp true <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp true <16 x float> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp true double undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp true <2 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp true <4 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp true <8 x double> undef, undef -; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp true <16 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp true <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp true <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp true <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp true <16 x double> undef, undef ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'cmp_float_true' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp true float undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp true <2 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp true <4 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp true <8 x float> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp true <16 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp true <2 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp true <4 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp true <8 x float> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp true <16 x float> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp true double undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp true <2 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp true <4 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp true <8 x double> undef, undef -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp true <16 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp true <2 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp true <4 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp true <8 x double> undef, undef +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp true <16 x double> undef, undef ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_true' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp true float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp true <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fcmp true <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fcmp true <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fcmp true <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp true <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp true <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp true <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp true <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp true double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp true <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fcmp true <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fcmp true <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = fcmp true <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp true <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp true <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp true <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp true <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'cmp_float_true' diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index d49d557916c..8432b910d91 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -341,114 +341,33 @@ define i32 @maxi32(i32) { } define float @maxf8(float) { -; SSE-LABEL: @maxf8( -; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 -; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 -; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] -; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 -; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] -; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 -; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] -; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 -; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] -; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 -; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] -; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] -; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 -; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] -; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] -; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] -; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] -; SSE-NEXT: ret float [[TMP23]] -; -; AVX-LABEL: @maxf8( -; AVX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; AVX-NEXT: ret float [[TMP16]] -; -; AVX2-LABEL: @maxf8( -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; AVX2-NEXT: ret float [[TMP16]] -; -; SKX-LABEL: @maxf8( -; SKX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; SKX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; SKX-NEXT: ret float [[TMP16]] +; CHECK-LABEL: @maxf8( +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; CHECK-NEXT: ret float [[TMP16]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -476,195 +395,52 @@ define float @maxf8(float) { } define float @maxf16(float) { -; SSE-LABEL: @maxf16( -; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 -; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 -; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] -; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 -; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] -; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 -; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] -; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 -; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] -; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 -; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] -; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] -; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 -; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] -; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] -; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] -; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] -; SSE-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 -; SSE-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] -; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] -; SSE-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 -; SSE-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] -; SSE-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] -; SSE-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 -; SSE-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] -; SSE-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] -; SSE-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 -; SSE-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] -; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] -; SSE-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 -; SSE-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] -; SSE-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] -; SSE-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 -; SSE-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] -; SSE-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] -; SSE-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 -; SSE-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] -; SSE-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] -; SSE-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 -; SSE-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] -; SSE-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] -; SSE-NEXT: ret float [[TMP47]] -; -; AVX-LABEL: @maxf16( -; AVX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef -; AVX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef -; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef -; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef -; AVX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef -; AVX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef -; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef -; AVX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef -; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef -; AVX-NEXT: ret float [[TMP32]] -; -; AVX2-LABEL: @maxf16( -; AVX2-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef -; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef -; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef -; AVX2-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef -; AVX2-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef -; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef -; AVX2-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef -; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef -; AVX2-NEXT: ret float [[TMP32]] -; -; SKX-LABEL: @maxf16( -; SKX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef -; SKX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef -; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef -; SKX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef -; SKX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef -; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef -; SKX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef -; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef -; SKX-NEXT: ret float [[TMP32]] +; CHECK-LABEL: @maxf16( +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; CHECK-NEXT: ret float [[TMP32]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4