From 28a7869cfd1b2188928f448ef54c47c14a20dbf6 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 12 Aug 2019 16:54:07 +0000 Subject: [PATCH] [ARM] MVE shuffle broadcast costs A VDUP will perform a vector broadcast in a single instruction. Update the cost model for MVE accordingly. Code originally by David Sherwood. Differential Revision: https://reviews.llvm.org/D63448 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368589 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 17 +++++++++++++++++ test/Analysis/CostModel/ARM/shuffle.ll | 16 ++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 4e87f254893..f3da1b83765 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -514,6 +514,23 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return LT.first * Entry->Cost; } } + if (ST->hasMVEIntegerOps()) { + if (Kind == TTI::SK_Broadcast) { + static const CostTblEntry MVEDupTbl[] = { + // VDUP handles these cases. + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}}; + + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE, + LT.second)) + return LT.first * Entry->Cost; + } + } return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } diff --git a/test/Analysis/CostModel/ARM/shuffle.ll b/test/Analysis/CostModel/ARM/shuffle.ll index 53668b7e29f..78c2dc8f522 100644 --- a/test/Analysis/CostModel/ARM/shuffle.ll +++ b/test/Analysis/CostModel/ARM/shuffle.ll @@ -5,16 +5,16 @@ define void @broadcast() { ; CHECK-MVE-LABEL: 'broadcast' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -- 2.40.0