From 15a59473b3d4f74a4e8b32f814fe9abb2f81f93e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 9 May 2016 21:14:38 +0000 Subject: [PATCH] [X86][SSE] Improve cost model for i64 vector comparisons on pre-SSE42 targets As discussed on PR24888, until SSE42 we don't have access to PCMPGTQ for v2i64 comparisons, but the cost models don't reflect this, resulting in over-optimistic vectorizaton. This patch adds SSE2 'base level' costs that match what a typical target is capable of and only reduces the v2i64 costs at SSE42. Technically SSE41 provides a PCMPEQQ v2i64 equality test, but as getCmpSelInstrCost doesn't give us a way to discriminate between comparison test types we can't easily make use of this, otherwise we could split the cost of integer equality and greater-than tests to give better costings of each. Differential Revision: http://reviews.llvm.org/D20057 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268972 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 14 +++++++++++--- test/Analysis/CostModel/X86/cmp.ll | 16 ++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 6ffef1654ea..28ae96d4108 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -857,13 +857,17 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry SSE2CostTbl[] = { + { ISD::SETCC, MVT::v2i64, 8 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + static const CostTblEntry SSE42CostTbl[] = { { ISD::SETCC, MVT::v2f64, 1 }, { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, }; static const CostTblEntry AVX1CostTbl[] = { @@ -906,6 +910,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) return LT.first * Entry->Cost; + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll index 80fbbc47089..f4733d6737e 100644 --- a/test/Analysis/CostModel/X86/cmp.ll +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -87,18 +87,18 @@ define i32 @cmp(i32 %arg) { ;AVX: cost of 1 {{.*}} icmp %H = icmp eq <4 x i32> undef, undef - ;SSE2: cost of 1 {{.*}} icmp - ;SSE3: cost of 1 {{.*}} icmp - ;SSSE3: cost of 1 {{.*}} icmp - ;SSE41: cost of 1 {{.*}} icmp + ;SSE2: cost of 8 {{.*}} icmp + ;SSE3: cost of 8 {{.*}} icmp + ;SSSE3: cost of 8 {{.*}} icmp + ;SSE41: cost of 8 {{.*}} icmp ;SSE42: cost of 1 {{.*}} icmp ;AVX: cost of 1 {{.*}} icmp %I = icmp eq <2 x i64> undef, undef - ;SSE2: cost of 2 {{.*}} icmp - ;SSE3: cost of 2 {{.*}} icmp - ;SSSE3: cost of 2 {{.*}} icmp - ;SSE41: cost of 2 {{.*}} icmp + ;SSE2: cost of 16 {{.*}} icmp + ;SSE3: cost of 16 {{.*}} icmp + ;SSSE3: cost of 16 {{.*}} icmp + ;SSE41: cost of 16 {{.*}} icmp ;SSE42: cost of 2 {{.*}} icmp ;AVX1: cost of 4 {{.*}} icmp ;AVX2: cost of 1 {{.*}} icmp -- 2.50.1