From: Simon Pilgrim Date: Sun, 20 Jan 2019 13:55:01 +0000 (+0000) Subject: [CostModel][X86] Add explicit vector select costs X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=21d100aff8ac2ea4879b0eb1c525861c90a2866a;p=llvm [CostModel][X86] Add explicit vector select costs Prior to SSE41 (and sometimes on AVX1), vector select has to be performed as a ((X & C)|(Y & ~C)) bit select. Exposes a couple of issues with the min/max reduction costs (which only go down to SSE42 for some reason). The increase pre-SSE41 selection costs also prevent a couple of tests from firing any longer, so I've either tweaked the target or added AVX tests as well to the existing SSE2 tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351685 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 1d94eed2372..ec6dc72728a 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1653,6 +1653,9 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, static const CostTblEntry AVX512BWCostTbl[] = { { ISD::SETCC, MVT::v32i16, 1 }, { ISD::SETCC, MVT::v64i8, 1 }, + + { ISD::SELECT, MVT::v32i16, 1 }, + { ISD::SELECT, MVT::v64i8, 1 }, }; static const CostTblEntry AVX512CostTbl[] = { @@ -1660,6 +1663,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SETCC, MVT::v16i32, 1 }, { ISD::SETCC, MVT::v8f64, 1 }, { ISD::SETCC, MVT::v16f32, 1 }, + + { ISD::SELECT, MVT::v8i64, 1 }, + { ISD::SELECT, MVT::v16i32, 1 }, + { ISD::SELECT, MVT::v8f64, 1 }, + { ISD::SELECT, MVT::v16f32, 1 }, }; static const CostTblEntry AVX2CostTbl[] = { @@ -1667,6 +1675,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SETCC, MVT::v8i32, 1 }, { ISD::SETCC, MVT::v16i16, 1 }, { ISD::SETCC, MVT::v32i8, 1 }, + + { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb + { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb + { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb + { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb }; static const CostTblEntry AVX1CostTbl[] = { @@ -1677,6 +1690,13 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SETCC, MVT::v8i32, 4 }, { ISD::SETCC, MVT::v16i16, 4 }, { ISD::SETCC, MVT::v32i8, 4 }, + + { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd + { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps + { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd + { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps + { ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps + { ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps }; static const CostTblEntry SSE42CostTbl[] = { @@ -1685,6 +1705,15 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SETCC, MVT::v2i64, 1 }, }; + static const CostTblEntry SSE41CostTbl[] = { + { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd + { ISD::SELECT, MVT::v4f32, 1 }, // blendvps + { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb + { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb + { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb + { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb + }; + static const CostTblEntry SSE2CostTbl[] = { { ISD::SETCC, MVT::v2f64, 2 }, { ISD::SETCC, MVT::f64, 1 }, @@ -1692,11 +1721,19 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SETCC, MVT::v4i32, 1 }, { ISD::SETCC, MVT::v8i16, 1 }, { ISD::SETCC, MVT::v16i8, 1 }, + + { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd + { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por + { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por + { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por + { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por }; static const CostTblEntry SSE1CostTbl[] = { { ISD::SETCC, MVT::v4f32, 2 }, { ISD::SETCC, MVT::f32, 1 }, + + { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps }; if (ST->hasBWI()) @@ -1719,6 +1756,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) return LT.first * Entry->Cost; + if (ST->hasSSE41()) + if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + if (ST->hasSSE2()) if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) return LT.first * Entry->Cost; diff --git a/test/Analysis/CostModel/X86/fshl.ll b/test/Analysis/CostModel/X86/fshl.ll index d5da8e7ae4d..38621c35a88 100644 --- a/test/Analysis/CostModel/X86/fshl.ll +++ b/test/Analysis/CostModel/X86/fshl.ll @@ -22,9 +22,9 @@ target triple = "x86_64-apple-macosx10.8.0" define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) { ; SSSE3-LABEL: 'var_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 %c64) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i64' @@ -93,9 +93,9 @@ define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64 define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { ; SSSE3-LABEL: 'var_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 %c32) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i32' @@ -164,9 +164,9 @@ define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3 define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'var_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i16' @@ -179,8 +179,8 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; AVX1-LABEL: 'var_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'var_funnel_i16' @@ -228,15 +228,15 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; BDVER2-LABEL: 'var_funnel_i16' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'var_funnel_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) @@ -249,9 +249,9 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'var_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i8' @@ -264,8 +264,8 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; AVX1-LABEL: 'var_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'var_funnel_i8' @@ -313,15 +313,15 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; BDVER2-LABEL: 'var_funnel_i8' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'var_funnel_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) @@ -340,9 +340,9 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i64' @@ -427,14 +427,23 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 } define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { -; SSE-LABEL: 'splatvar_funnel_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatvar_funnel_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatvar_funnel_i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer @@ -509,22 +518,31 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 } define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { -; SSE-LABEL: 'splatvar_funnel_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatvar_funnel_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatvar_funnel_i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' @@ -586,8 +604,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; BDVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatvar_funnel_i16' @@ -595,8 +613,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer @@ -613,9 +631,9 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i8' @@ -632,8 +650,8 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' @@ -695,8 +713,8 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; BDVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatvar_funnel_i8' @@ -704,8 +722,8 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer @@ -724,9 +742,9 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'constant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i64' @@ -795,9 +813,9 @@ define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'constant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i32' @@ -866,9 +884,9 @@ define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'constant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i16' @@ -881,8 +899,8 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX1-LABEL: 'constant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'constant_funnel_i16' @@ -930,15 +948,15 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; BDVER2-LABEL: 'constant_funnel_i16' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'constant_funnel_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) @@ -951,9 +969,9 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'constant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i8' @@ -966,8 +984,8 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX1-LABEL: 'constant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'constant_funnel_i8' @@ -1015,15 +1033,15 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; BDVER2-LABEL: 'constant_funnel_i8' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'constant_funnel_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) @@ -1040,9 +1058,9 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' @@ -1109,12 +1127,19 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 } define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { -; SSE-LABEL: 'splatconstant_funnel_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatconstant_funnel_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatconstant_funnel_i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1173,18 +1198,25 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 } define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'splatconstant_funnel_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatconstant_funnel_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatconstant_funnel_i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' @@ -1232,15 +1264,15 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; BDVER2-LABEL: 'splatconstant_funnel_i16' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatconstant_funnel_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1251,18 +1283,25 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 } define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'splatconstant_funnel_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatconstant_funnel_i8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatconstant_funnel_i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' @@ -1310,15 +1349,15 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, < ; BDVER2-LABEL: 'splatconstant_funnel_i8' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatconstant_funnel_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) diff --git a/test/Analysis/CostModel/X86/fshr.ll b/test/Analysis/CostModel/X86/fshr.ll index 7450fc73d40..31e409cc9a7 100644 --- a/test/Analysis/CostModel/X86/fshr.ll +++ b/test/Analysis/CostModel/X86/fshr.ll @@ -22,9 +22,9 @@ target triple = "x86_64-apple-macosx10.8.0" define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) { ; SSSE3-LABEL: 'var_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i64' @@ -93,9 +93,9 @@ define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64 define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { ; SSSE3-LABEL: 'var_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i32' @@ -164,9 +164,9 @@ define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3 define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'var_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i16' @@ -179,8 +179,8 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; AVX1-LABEL: 'var_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'var_funnel_i16' @@ -228,15 +228,15 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; BDVER2-LABEL: 'var_funnel_i16' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'var_funnel_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) @@ -249,9 +249,9 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'var_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'var_funnel_i8' @@ -264,8 +264,8 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; AVX1-LABEL: 'var_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'var_funnel_i8' @@ -313,15 +313,15 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; BDVER2-LABEL: 'var_funnel_i8' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'var_funnel_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) @@ -340,9 +340,9 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i64' @@ -427,14 +427,23 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 } define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { -; SSE-LABEL: 'splatvar_funnel_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatvar_funnel_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatvar_funnel_i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer @@ -509,22 +518,31 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 } define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { -; SSE-LABEL: 'splatvar_funnel_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatvar_funnel_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatvar_funnel_i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' @@ -586,8 +604,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; BDVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatvar_funnel_i16' @@ -595,8 +613,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer @@ -613,9 +631,9 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i8' @@ -632,8 +650,8 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' @@ -695,8 +713,8 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; BDVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; BDVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatvar_funnel_i8' @@ -704,8 +722,8 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer @@ -724,9 +742,9 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'constant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i64' @@ -795,9 +813,9 @@ define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'constant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i32' @@ -866,9 +884,9 @@ define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'constant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i16' @@ -881,8 +899,8 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX1-LABEL: 'constant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'constant_funnel_i16' @@ -930,15 +948,15 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; BDVER2-LABEL: 'constant_funnel_i16' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'constant_funnel_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) @@ -951,9 +969,9 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'constant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'constant_funnel_i8' @@ -966,8 +984,8 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX1-LABEL: 'constant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'constant_funnel_i8' @@ -1015,15 +1033,15 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; BDVER2-LABEL: 'constant_funnel_i8' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'constant_funnel_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) @@ -1040,9 +1058,9 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' @@ -1109,12 +1127,19 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 } define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { -; SSE-LABEL: 'splatconstant_funnel_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatconstant_funnel_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatconstant_funnel_i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1173,18 +1198,25 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 } define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'splatconstant_funnel_i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatconstant_funnel_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatconstant_funnel_i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' @@ -1232,15 +1264,15 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; BDVER2-LABEL: 'splatconstant_funnel_i16' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatconstant_funnel_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1251,18 +1283,25 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 } define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'splatconstant_funnel_i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSSE3-LABEL: 'splatconstant_funnel_i8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'splatconstant_funnel_i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' @@ -1310,15 +1349,15 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, < ; BDVER2-LABEL: 'splatconstant_funnel_i8' ; BDVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BDVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BDVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BDVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'splatconstant_funnel_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) diff --git a/test/Analysis/CostModel/X86/reduce-smax-widen.ll b/test/Analysis/CostModel/X86/reduce-smax-widen.ll index 44b01d3707c..6eba5c354b9 100644 --- a/test/Analysis/CostModel/X86/reduce-smax-widen.ll +++ b/test/Analysis/CostModel/X86/reduce-smax-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-smax.ll b/test/Analysis/CostModel/X86/reduce-smax.ll index b4b9a3ba637..4ea2a704967 100644 --- a/test/Analysis/CostModel/X86/reduce-smax.ll +++ b/test/Analysis/CostModel/X86/reduce-smax.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-smin-widen.ll b/test/Analysis/CostModel/X86/reduce-smin-widen.ll index 18dd54cf026..afcf9b2a182 100644 --- a/test/Analysis/CostModel/X86/reduce-smin-widen.ll +++ b/test/Analysis/CostModel/X86/reduce-smin-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -197,23 +197,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -231,9 +231,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-smin.ll b/test/Analysis/CostModel/X86/reduce-smin.ll index f420ad736ed..0d9adef8566 100644 --- a/test/Analysis/CostModel/X86/reduce-smin.ll +++ b/test/Analysis/CostModel/X86/reduce-smin.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-umax-widen.ll b/test/Analysis/CostModel/X86/reduce-umax-widen.ll index c48cfdf9778..c3ef8fdd546 100644 --- a/test/Analysis/CostModel/X86/reduce-umax-widen.ll +++ b/test/Analysis/CostModel/X86/reduce-umax-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-umax.ll b/test/Analysis/CostModel/X86/reduce-umax.ll index 490b0e469b1..e0a5ab12a39 100644 --- a/test/Analysis/CostModel/X86/reduce-umax.ll +++ b/test/Analysis/CostModel/X86/reduce-umax.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-umin-widen.ll b/test/Analysis/CostModel/X86/reduce-umin-widen.ll index b59e2f3712e..71674ad6f78 100644 --- a/test/Analysis/CostModel/X86/reduce-umin-widen.ll +++ b/test/Analysis/CostModel/X86/reduce-umin-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/reduce-umin.ll b/test/Analysis/CostModel/X86/reduce-umin.ll index f2f519c6023..4a082150eb0 100644 --- a/test/Analysis/CostModel/X86/reduce-umin.ll +++ b/test/Analysis/CostModel/X86/reduce-umin.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,21 +124,21 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -154,9 +154,9 @@ define i32 @reduce_i16(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -206,23 +206,23 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' @@ -240,9 +240,9 @@ define i32 @reduce_i8(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' diff --git a/test/Analysis/CostModel/X86/vselect-cost.ll b/test/Analysis/CostModel/X86/vselect-cost.ll index a77d8fa47ca..58294235569 100644 --- a/test/Analysis/CostModel/X86/vselect-cost.ll +++ b/test/Analysis/CostModel/X86/vselect-cost.ll @@ -9,24 +9,43 @@ ; Verify the cost of vector select instructions. define i32 @test_select() { -; SSE-LABEL: 'test_select' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = select i1 undef, i64 undef, i64 undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = select i1 undef, i32 undef, i32 undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = select i1 undef, i16 undef, i16 undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = select <32 x i1> undef, <32 x i16> undef, <32 x i16> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = select i1 undef, i8 undef, i8 undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = select <64 x i1> undef, <64 x i8> undef, <64 x i8> undef -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'test_select' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = select i1 undef, i64 undef, i64 undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = select i1 undef, i32 undef, i32 undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = select i1 undef, i16 undef, i16 undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = select <32 x i1> undef, <32 x i16> undef, <32 x i16> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = select i1 undef, i8 undef, i8 undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = select <64 x i1> undef, <64 x i8> undef, <64 x i8> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'test_select' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = select i1 undef, i64 undef, i64 undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = select i1 undef, i32 undef, i32 undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = select i1 undef, i16 undef, i16 undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = select <32 x i1> undef, <32 x i16> undef, <32 x i16> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = select i1 undef, i8 undef, i8 undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = select <64 x i1> undef, <64 x i8> undef, <64 x i8> undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'test_select' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = select i1 undef, i64 undef, i64 undef @@ -39,12 +58,12 @@ define i32 @test_select() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = select i1 undef, i16 undef, i16 undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = select <32 x i1> undef, <32 x i16> undef, <32 x i16> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = select <32 x i1> undef, <32 x i16> undef, <32 x i16> undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = select i1 undef, i8 undef, i8 undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = select <64 x i1> undef, <64 x i8> undef, <64 x i8> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = select <64 x i1> undef, <64 x i8> undef, <64 x i8> undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'test_select' @@ -131,45 +150,85 @@ define i32 @test_select() { ; <4 x float>. Integers of the same size should also use those instructions. define <2 x i64> @test_2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: 'test_2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <2 x i1> , <2 x i64> %a, <2 x i64> %b -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %sel +; SSE2-LABEL: 'test_2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <2 x i1> , <2 x i64> %a, <2 x i64> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %sel +; +; SSE41-LABEL: 'test_2i64' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <2 x i1> , <2 x i64> %a, <2 x i64> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %sel +; +; AVX-LABEL: 'test_2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <2 x i1> , <2 x i64> %a, <2 x i64> %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %sel ; %sel = select <2 x i1> , <2 x i64> %a, <2 x i64> %b ret <2 x i64> %sel } define <2 x double> @test_2double(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: 'test_2double' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <2 x i1> , <2 x double> %a, <2 x double> %b -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %sel +; SSE2-LABEL: 'test_2double' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <2 x i1> , <2 x double> %a, <2 x double> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %sel +; +; SSE41-LABEL: 'test_2double' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <2 x i1> , <2 x double> %a, <2 x double> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %sel +; +; AVX-LABEL: 'test_2double' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <2 x i1> , <2 x double> %a, <2 x double> %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %sel ; %sel = select <2 x i1> , <2 x double> %a, <2 x double> %b ret <2 x double> %sel } define <4 x i32> @test_4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: 'test_4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %sel +; SSE2-LABEL: 'test_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %sel +; +; SSE41-LABEL: 'test_4i32' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %sel +; +; AVX-LABEL: 'test_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %sel ; %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %sel } define <4 x float> @test_4float(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: 'test_4float' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x float> %a, <4 x float> %b -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %sel +; SSE2-LABEL: 'test_4float' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <4 x i1> , <4 x float> %a, <4 x float> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %sel +; +; SSE41-LABEL: 'test_4float' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x float> %a, <4 x float> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %sel +; +; AVX-LABEL: 'test_4float' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x float> %a, <4 x float> %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %sel ; %sel = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %sel } define <16 x i8> @test_16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: 'test_16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <16 x i1> , <16 x i8> %a, <16 x i8> %b -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %sel +; SSE2-LABEL: 'test_16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <16 x i1> , <16 x i8> %a, <16 x i8> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %sel +; +; SSE41-LABEL: 'test_16i8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <16 x i1> , <16 x i8> %a, <16 x i8> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %sel +; +; AVX-LABEL: 'test_16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <16 x i1> , <16 x i8> %a, <16 x i8> %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %sel ; %sel = select <16 x i1> , <16 x i8> %a, <16 x i8> %b ret <16 x i8> %sel @@ -178,9 +237,13 @@ define <16 x i8> @test_16i8(<16 x i8> %a, <16 x i8> %b) { ; AVX added blend instructions with an immediate for <4 x double> and ; <8 x float>. Integers of the same size should also use those instructions. define <4 x i64> @test_4i64(<4 x i64> %a, <4 x i64> %b) { -; SSE-LABEL: 'test_4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <4 x i1> , <4 x i64> %a, <4 x i64> %b -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %sel +; SSE2-LABEL: 'test_4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sel = select <4 x i1> , <4 x i64> %a, <4 x i64> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %sel +; +; SSE41-LABEL: 'test_4i64' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <4 x i1> , <4 x i64> %a, <4 x i64> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %sel ; ; AVX-LABEL: 'test_4i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x i64> %a, <4 x i64> %b @@ -191,9 +254,13 @@ define <4 x i64> @test_4i64(<4 x i64> %a, <4 x i64> %b) { } define <4 x double> @test_4double(<4 x double> %a, <4 x double> %b) { -; SSE-LABEL: 'test_4double' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <4 x i1> , <4 x double> %a, <4 x double> %b -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %sel +; SSE2-LABEL: 'test_4double' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sel = select <4 x i1> , <4 x double> %a, <4 x double> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %sel +; +; SSE41-LABEL: 'test_4double' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <4 x i1> , <4 x double> %a, <4 x double> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %sel ; ; AVX-LABEL: 'test_4double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <4 x i1> , <4 x double> %a, <4 x double> %b @@ -204,9 +271,13 @@ define <4 x double> @test_4double(<4 x double> %a, <4 x double> %b) { } define <8 x i32> @test_8i32(<8 x i32> %a, <8 x i32> %b) { -; SSE-LABEL: 'test_8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <8 x i1> , <8 x i32> %a, <8 x i32> %b -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %sel +; SSE2-LABEL: 'test_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sel = select <8 x i1> , <8 x i32> %a, <8 x i32> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %sel +; +; SSE41-LABEL: 'test_8i32' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <8 x i1> , <8 x i32> %a, <8 x i32> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %sel ; ; AVX-LABEL: 'test_8i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <8 x i1> , <8 x i32> %a, <8 x i32> %b @@ -217,9 +288,13 @@ define <8 x i32> @test_8i32(<8 x i32> %a, <8 x i32> %b) { } define <8 x float> @test_8float(<8 x float> %a, <8 x float> %b) { -; SSE-LABEL: 'test_8float' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <8 x i1> , <8 x float> %a, <8 x float> %b -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %sel +; SSE2-LABEL: 'test_8float' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sel = select <8 x i1> , <8 x float> %a, <8 x float> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %sel +; +; SSE41-LABEL: 'test_8float' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <8 x i1> , <8 x float> %a, <8 x float> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %sel ; ; AVX-LABEL: 'test_8float' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <8 x i1> , <8 x float> %a, <8 x float> %b @@ -231,26 +306,50 @@ define <8 x float> @test_8float(<8 x float> %a, <8 x float> %b) { ; AVX2 define <16 x i16> @test_16i16(<16 x i16> %a, <16 x i16> %b) { -; SSE-LABEL: 'test_16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel +; SSE2-LABEL: 'test_16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel +; +; SSE41-LABEL: 'test_16i16' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel ; -; AVX-LABEL: 'test_16i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel +; AVX1-LABEL: 'test_16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel +; +; AVX2-LABEL: 'test_16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel +; +; AVX512-LABEL: 'test_16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %sel ; %sel = select <16 x i1> , <16 x i16> %a, <16 x i16> %b ret <16 x i16> %sel } define <32 x i8> @test_32i8(<32 x i8> %a, <32 x i8> %b) { -; SSE-LABEL: 'test_32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel +; SSE2-LABEL: 'test_32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel +; +; SSE41-LABEL: 'test_32i8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel +; +; AVX1-LABEL: 'test_32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel +; +; AVX2-LABEL: 'test_32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel ; -; AVX-LABEL: 'test_32i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel +; AVX512-LABEL: 'test_32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %sel ; %sel = select <32 x i1> , <32 x i8> %a, <32 x i8> %b ret <32 x i8> %sel diff --git a/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 25e164fc1c9..f9ccbf146fc 100644 --- a/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -loop-vectorize -mtriple=x86_64-apple-darwin %s | FileCheck %s +; RUN: opt -S -loop-vectorize -mtriple=x86_64-apple-darwin %s | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt -S -loop-vectorize -mtriple=x86_64-apple-darwin -mattr=+avx %s | FileCheck %s --check-prefixes=CHECK,AVX ; Two mostly identical functions. The only difference is the presence of ; fast-math flags on the second. The loop is a pretty simple reduction: @@ -64,59 +65,85 @@ done: } define double @sumIfVector(double* nocapture readonly %arr) { -; CHECK-LABEL: @sumIfVector( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[ARR:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <2 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[TMP4]], -; CHECK-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 -; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[PREDPHI]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[PREDPHI]], [[RDX_SHUF]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[BIN_RDX]], i32 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 32, 32 -; CHECK-NEXT: br i1 [[CMP_N]], label [[DONE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; CHECK-NEXT: [[TOT:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr double, double* [[ARR]], i32 [[I]] -; CHECK-NEXT: [[NEXTVAL:%.*]] = load double, double* [[ADDR]] -; CHECK-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 -; CHECK-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] -; CHECK: do.add: -; CHECK-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] -; CHECK-NEXT: br label [[NEXT_ITER]] -; CHECK: no.add: -; CHECK-NEXT: br label [[NEXT_ITER]] -; CHECK: next.iter: -; CHECK-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] -; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 -; CHECK-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 -; CHECK-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop !2 -; CHECK: done: -; CHECK-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret double [[TOT_NEXT_LCSSA]] +; SSE-LABEL: @sumIfVector( +; SSE-NEXT: entry: +; SSE-NEXT: br label [[LOOP:%.*]] +; SSE: loop: +; SSE-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] +; SSE-NEXT: [[TOT:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] +; SSE-NEXT: [[ADDR:%.*]] = getelementptr double, double* [[ARR:%.*]], i32 [[I]] +; SSE-NEXT: [[NEXTVAL:%.*]] = load double, double* [[ADDR]] +; SSE-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 +; SSE-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] +; SSE: do.add: +; SSE-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] +; SSE-NEXT: br label [[NEXT_ITER]] +; SSE: no.add: +; SSE-NEXT: br label [[NEXT_ITER]] +; SSE: next.iter: +; SSE-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] +; SSE-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; SSE-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 +; SSE-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE:%.*]] +; SSE: done: +; SSE-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ] +; SSE-NEXT: ret double [[TOT_NEXT_LCSSA]] +; +; AVX-LABEL: @sumIfVector( +; AVX-NEXT: entry: +; AVX-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AVX: vector.ph: +; AVX-NEXT: br label [[VECTOR_BODY:%.*]] +; AVX: vector.body: +; AVX-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AVX-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 +; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], +; AVX-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; AVX-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[ARR:%.*]], i32 [[TMP0]] +; AVX-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[TMP1]], i32 0 +; AVX-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <4 x double>* +; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP3]], align 8 +; AVX-NEXT: [[TMP4:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], +; AVX-NEXT: [[TMP5:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] +; AVX-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], +; AVX-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x double> [[TMP5]], <4 x double> [[VEC_PHI]] +; AVX-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; AVX-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; AVX-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; AVX: middle.block: +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[PREDPHI]], <4 x double> undef, <4 x i32> +; AVX-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[PREDPHI]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> +; AVX-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]] +; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0 +; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i32 32, 32 +; AVX-NEXT: br i1 [[CMP_N]], label [[DONE:%.*]], label [[SCALAR_PH]] +; AVX: scalar.ph: +; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; AVX-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; AVX-NEXT: br label [[LOOP:%.*]] +; AVX: loop: +; AVX-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] +; AVX-NEXT: [[TOT:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] +; AVX-NEXT: [[ADDR:%.*]] = getelementptr double, double* [[ARR]], i32 [[I]] +; AVX-NEXT: [[NEXTVAL:%.*]] = load double, double* [[ADDR]] +; AVX-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 +; AVX-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] +; AVX: do.add: +; AVX-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] +; AVX-NEXT: br label [[NEXT_ITER]] +; AVX: no.add: +; AVX-NEXT: br label [[NEXT_ITER]] +; AVX: next.iter: +; AVX-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] +; AVX-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; AVX-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 +; AVX-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop !2 +; AVX: done: +; AVX-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; AVX-NEXT: ret double [[TOT_NEXT_LCSSA]] ; entry: br label %loop diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll index 5efabe16d32..5b3f5c58c79 100644 --- a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll +++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -mcpu=core2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -mattr=+sse4.2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s ; REQUIRES: asserts ; Make sure we use the right select kind when querying select costs. diff --git a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 5cee363de16..47e89df5ab6 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -1,49 +1,93 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s -check-prefixes=CHECK,SSE +; RUN: opt < %s -basicaa -slp-vectorizer -S -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { -; CHECK-LABEL: @testfunc( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], -; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> -; CHECK-NEXT: [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], -; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x float> zeroinitializer, [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> , <2 x float> [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 -; CHECK-NEXT: [[ADD13]] = fadd float [[TMP16]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x float> undef, float [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x float> [[TMP18]], float [[ADD13]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = fcmp olt <2 x float> [[TMP19]], -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP20]], <2 x float> [[TMP19]], <2 x float> -; CHECK-NEXT: [[TMP22:%.*]] = fcmp olt <2 x float> [[TMP21]], -; CHECK-NEXT: [[TMP23]] = select <2 x i1> [[TMP22]], <2 x float> , <2 x float> [[TMP21]] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void +; SSE-LABEL: @testfunc( +; SSE-NEXT: entry: +; SSE-NEXT: br label [[FOR_BODY:%.*]] +; SSE: for.body: +; SSE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; SSE-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ] +; SSE-NEXT: [[S1_055:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[COND_I40:%.*]], [[FOR_BODY]] ] +; SSE-NEXT: [[S0_054:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[COND_I44:%.*]], [[FOR_BODY]] ] +; SSE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]] +; SSE-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; SSE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; SSE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]] +; SSE-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4 +; SSE-NEXT: [[ADD:%.*]] = fadd float [[S0_054]], [[TMP0]] +; SSE-NEXT: [[ADD3:%.*]] = fadd float [[S1_055]], [[TMP0]] +; SSE-NEXT: [[MUL:%.*]] = fmul float [[S0_054]], 0.000000e+00 +; SSE-NEXT: [[ADD4:%.*]] = fadd float [[MUL]], [[ADD3]] +; SSE-NEXT: [[MUL5:%.*]] = fmul float [[S1_055]], 0.000000e+00 +; SSE-NEXT: [[ADD6:%.*]] = fadd float [[MUL5]], [[ADD]] +; SSE-NEXT: [[CMP_I:%.*]] = fcmp olt float [[ADD6]], 1.000000e+00 +; SSE-NEXT: [[COND_I:%.*]] = select i1 [[CMP_I]], float [[ADD6]], float 1.000000e+00 +; SSE-NEXT: [[CMP_I51:%.*]] = fcmp olt float [[COND_I]], -1.000000e+00 +; SSE-NEXT: [[CMP_I49:%.*]] = fcmp olt float [[ADD4]], 1.000000e+00 +; SSE-NEXT: [[COND_I50:%.*]] = select i1 [[CMP_I49]], float [[ADD4]], float 1.000000e+00 +; SSE-NEXT: [[CMP_I47:%.*]] = fcmp olt float [[COND_I50]], -1.000000e+00 +; SSE-NEXT: [[COND_I_OP:%.*]] = fmul float [[COND_I]], 0.000000e+00 +; SSE-NEXT: [[MUL10:%.*]] = select i1 [[CMP_I51]], float -0.000000e+00, float [[COND_I_OP]] +; SSE-NEXT: [[COND_I50_OP:%.*]] = fmul float [[COND_I50]], 0.000000e+00 +; SSE-NEXT: [[MUL11:%.*]] = select i1 [[CMP_I47]], float -0.000000e+00, float [[COND_I50_OP]] +; SSE-NEXT: [[ADD13]] = fadd float [[MUL10]], [[MUL11]] +; SSE-NEXT: [[CMP_I45:%.*]] = fcmp olt float [[ADD13]], 1.000000e+00 +; SSE-NEXT: [[COND_I46:%.*]] = select i1 [[CMP_I45]], float [[ADD13]], float 1.000000e+00 +; SSE-NEXT: [[CMP_I43:%.*]] = fcmp olt float [[COND_I46]], -1.000000e+00 +; SSE-NEXT: [[COND_I44]] = select i1 [[CMP_I43]], float -1.000000e+00, float [[COND_I46]] +; SSE-NEXT: [[CMP_I41:%.*]] = fcmp olt float [[MUL11]], 1.000000e+00 +; SSE-NEXT: [[COND_I42:%.*]] = select i1 [[CMP_I41]], float [[MUL11]], float 1.000000e+00 +; SSE-NEXT: [[CMP_I39:%.*]] = fcmp olt float [[COND_I42]], -1.000000e+00 +; SSE-NEXT: [[COND_I40]] = select i1 [[CMP_I39]], float -1.000000e+00, float [[COND_I42]] +; SSE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 +; SSE-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; SSE: for.end: +; SSE-NEXT: ret void +; +; AVX-LABEL: @testfunc( +; AVX-NEXT: entry: +; AVX-NEXT: br label [[FOR_BODY:%.*]] +; AVX: for.body: +; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; AVX-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ] +; AVX-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] +; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]] +; AVX-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; AVX-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]] +; AVX-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4 +; AVX-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 +; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1 +; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> zeroinitializer, [[TMP0]] +; AVX-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP9]], [[TMP8]] +; AVX-NEXT: [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], +; AVX-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> +; AVX-NEXT: [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], +; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> zeroinitializer, [[TMP12]] +; AVX-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> , <2 x float> [[TMP14]] +; AVX-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 +; AVX-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 +; AVX-NEXT: [[ADD13]] = fadd float [[TMP16]], [[TMP17]] +; AVX-NEXT: [[TMP18:%.*]] = insertelement <2 x float> undef, float [[TMP17]], i32 0 +; AVX-NEXT: [[TMP19:%.*]] = insertelement <2 x float> [[TMP18]], float [[ADD13]], i32 1 +; AVX-NEXT: [[TMP20:%.*]] = fcmp olt <2 x float> [[TMP19]], +; AVX-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP20]], <2 x float> [[TMP19]], <2 x float> +; AVX-NEXT: [[TMP22:%.*]] = fcmp olt <2 x float> [[TMP21]], +; AVX-NEXT: [[TMP23]] = select <2 x i1> [[TMP22]], <2 x float> , <2 x float> [[TMP21]] +; AVX-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 +; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; AVX: for.end: +; AVX-NEXT: ret void ; entry: br label %for.body diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 8432b910d91..271244e54fc 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -677,35 +677,27 @@ define i32 @maxi8_mutiple_uses(i32) { ; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 ; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> -; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] -; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] -; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> -; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SSE-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SSE-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] -; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] -; SSE-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] -; SSE-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; SSE-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] -; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] -; SSE-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; SSE-NEXT: store i32 [[TMP25]], i32* @var, align 8 -; SSE-NEXT: ret i32 [[TMP24]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; SSE-NEXT: store i32 [[TMP24]], i32* @var, align 8 +; SSE-NEXT: ret i32 [[TMP23]] ; ; AVX-LABEL: @maxi8_mutiple_uses( ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -853,35 +845,25 @@ define i32 @maxi8_wrong_parent(i32) { ; SSE-NEXT: br label [[PP:%.*]] ; SSE: pp: ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 ; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] ; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> -; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] -; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] -; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> -; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SSE-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SSE-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] -; SSE-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] -; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] -; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] -; SSE-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] -; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; SSE-NEXT: ret i32 [[OP_EXTRA]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; SSE-NEXT: ret i32 [[TMP23]] ; ; AVX-LABEL: @maxi8_wrong_parent( ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16