From: Simon Pilgrim Date: Sun, 2 Jun 2019 18:06:42 +0000 (+0000) Subject: [TTI][X86] Cleanup getMaskedMemoryOpCost. NFCI. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=852d2fed625dd7260dc6c30d1f98d3193975fe7f;p=llvm [TTI][X86] Cleanup getMaskedMemoryOpCost. NFCI. Prep work before resurrecting D61257. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362335 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 7501834ea4c..edd63846258 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2346,6 +2346,9 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned Alignment, unsigned AddressSpace) { + bool IsLoad = (Instruction::Load == Opcode); + bool IsStore = (Instruction::Store == Opcode); + VectorType *SrcVTy = dyn_cast(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask @@ -2353,10 +2356,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned NumElem = SrcVTy->getVectorNumElements(); VectorType *MaskTy = - VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); - if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) || - (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) || - !isPowerOf2_32(NumElem)) { + VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); + if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) || + (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) { // Scalarization int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); int ScalarCompareCost = getCmpSelInstrCost( @@ -2364,8 +2366,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, int BranchCost = getCFInstrCost(Instruction::Br); int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); - int ValueSplitCost = getScalarizationOverhead( - SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store); + int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore); int MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), Alignment, AddressSpace); @@ -2388,11 +2389,13 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, // Expanding requires fill mask with zeroes Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy); } + + // Pre-AVX512 - each maskmov costs 4. if (!ST->hasAVX512()) - return Cost + LT.first*4; // Each maskmov costs 4 + return Cost + LT.first * 4; // AVX-512 masked load/store is cheapper - return Cost+LT.first; + return Cost + LT.first; } int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,