From 42df88e1f834d4185530360cf27c1aad6e9e7551 Mon Sep 17 00:00:00 2001 From: Matthew Simpson Date: Mon, 10 Apr 2017 18:34:37 +0000 Subject: [PATCH] [ARM/AArch64] Ensure valid vector element types for interleaved accesses This patch refactors and strengthens the type checks performed for interleaved accesses. The primary functional change is to ensure that the interleaved accesses have valid element types. The added test cases previously failed because the element type is f128. Differential Revision: https://reviews.llvm.org/D31817 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299864 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 30 ++++++++-- lib/Target/AArch64/AArch64ISelLowering.h | 11 ++++ .../AArch64/AArch64TargetTransformInfo.cpp | 8 +-- lib/Target/ARM/ARMISelLowering.cpp | 56 +++++++++++-------- lib/Target/ARM/ARMISelLowering.h | 11 ++++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 9 ++- .../AArch64/interleaved-accesses.ll | 14 +++++ .../ARM/interleaved-accesses.ll | 11 ++++ 8 files changed, 111 insertions(+), 39 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7dd62f78a8f..504cb5615b6 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7244,11 +7244,31 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, /// A helper function for determining the number of interleaved accesses we /// will generate when lowering accesses of the given type. -static unsigned getNumInterleavedAccesses(VectorType *VecTy, - const DataLayout &DL) { +unsigned +AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const { return (DL.getTypeSizeInBits(VecTy) + 127) / 128; } +bool AArch64TargetLowering::isLegalInterleavedAccessType( + VectorType *VecTy, const DataLayout &DL) const { + + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); + + // Ensure the number of vector elements is greater than 1. + if (VecTy->getNumElements() < 2) + return false; + + // Ensure the element type is legal. + if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64) + return false; + + // Ensure the total vector size is 64 or a multiple of 128. Types larger than + // 128 will be split into multiple interleaved accesses. + return VecSize == 64 || VecSize % 128 == 0; +} + /// \brief Lower an interleaved load into a ldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -7272,12 +7292,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad( const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); - unsigned VecSize = DL.getTypeSizeInBits(VecTy); // Skip if we do not have NEON and skip illegal vector types. We can // "legalize" wide vector types into multiple interleaved accesses as long as // the vector types are divisible by 128. - if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize % 128 != 0)) + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL)) return false; unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL); @@ -7402,12 +7421,11 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // Skip if we do not have NEON and skip illegal vector types. We can // "legalize" wide vector types into multiple interleaved accesses as long as // the vector types are divisible by 128. - if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize % 128 != 0)) + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL)) return false; unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 925112f7578..2ad6c8b23df 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -441,6 +441,17 @@ public: /// Returns the size of the platform's va_list object. unsigned getVaListSizeInBits(const DataLayout &DL) const override; + /// Returns true if \p VecTy is a legal interleaved access type. This + /// function checks the vector element type and the overall width of the + /// vector. + bool isLegalInterleavedAccessType(VectorType *VecTy, + const DataLayout &DL) const; + + /// Returns the number of interleaved accesses that will be generated when + /// lowering accesses of the given type. + unsigned getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const; + private: bool isExtFreeImpl(const Instruction *Ext) const override; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 77ca4c6b087..531cb9790d3 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -505,14 +505,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); - Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); // ldN/stN only support legal vector types of size 64 or 128 in bits. // Accesses having vector types that are a multiple of 128 bits can be // matched to more than one ldN/stN instruction. - if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize % 128 == 0)) - return Factor * ((SubVecSize + 127) / 128); + if (NumElts % Factor == 0 && + TLI->isLegalInterleavedAccessType(SubVecTy, DL)) + return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 93eed9160a0..e697c8ca533 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -13593,11 +13593,37 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, /// A helper function for determining the number of interleaved accesses we /// will generate when lowering accesses of the given type. -static unsigned getNumInterleavedAccesses(VectorType *VecTy, - const DataLayout &DL) { +unsigned +ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const { return (DL.getTypeSizeInBits(VecTy) + 127) / 128; } +bool ARMTargetLowering::isLegalInterleavedAccessType( + VectorType *VecTy, const DataLayout &DL) const { + + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); + + // Ensure the vector doesn't have f16 elements. Even though we could do an + // i16 vldN, we can't hold the f16 vectors and will end up converting via + // f32. + if (VecTy->getElementType()->isHalfTy()) + return false; + + // Ensure the number of vector elements is greater than 1. + if (VecTy->getNumElements() < 2) + return false; + + // Ensure the element type is legal. + if (ElSize != 8 && ElSize != 16 && ElSize != 32) + return false; + + // Ensure the total vector size is 64 or a multiple of 128. Types larger than + // 128 will be split into multiple interleaved accesses. + return VecSize == 64 || VecSize % 128 == 0; +} + /// \brief Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -13622,20 +13648,11 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip if we do not have NEON and skip illegal vector types and vector types - // with i64/f64 elements (vldN doesn't support i64/f64 elements). We can + // Skip if we do not have NEON and skip illegal vector types. We can // "legalize" wide vector types into multiple interleaved accesses as long as // the vector types are divisible by 128. - if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize % 128 != 0) || - EltIs64Bits) - return false; - - // Skip if the vector has f16 elements: even though we could do an i16 vldN, - // we can't hold the f16 vectors and will end up converting via f32. - if (EltTy->isHalfTy()) + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL)) return false; unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL); @@ -13765,20 +13782,11 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip if we do not have NEON and skip illegal vector types and vector types - // with i64/f64 elements (vldN doesn't support i64/f64 elements). We can + // Skip if we do not have NEON and skip illegal vector types. We can // "legalize" wide vector types into multiple interleaved accesses as long as // the vector types are divisible by 128. - if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize % 128 != 0) || - EltIs64Bits) - return false; - - // Skip if the vector has f16 elements: even though we could do an i16 vldN, - // we can't hold the f16 vectors and will end up converting via f32. - if (EltTy->isHalfTy()) + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL)) return false; unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 198000e5b5b..70a0b1380ec 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -530,6 +530,17 @@ class InstrItineraryData; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; + /// Returns true if \p VecTy is a legal interleaved access type. This + /// function checks the vector element type and the overall width of the + /// vector. + bool isLegalInterleavedAccessType(VectorType *VecTy, + const DataLayout &DL) const; + + /// Returns the number of interleaved accesses that will be generated when + /// lowering accesses of the given type. + unsigned getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index ea923c53b00..f2662a68b18 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -529,15 +529,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); - Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); // vldN/vstN only support legal vector types of size 64 or 128 in bits. // Accesses having vector types that are a multiple of 128 bits can be // matched to more than one vldN/vstN instruction. - if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize % 128 == 0) && - !VecTy->getScalarType()->isHalfTy()) - return Factor * ((SubVecSize + 127) / 128); + if (NumElts % Factor == 0 && + TLI->isLegalInterleavedAccessType(SubVecTy, DL)) + return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll index 779edb56751..a038fd1a411 100644 --- a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll @@ -760,3 +760,17 @@ define void @store_factor4_wide(<32 x i32>* %ptr, <8 x i32> %v0, <8 x i32> %v1, store <32 x i32> %interleaved.vec, <32 x i32>* %ptr, align 4 ret void } + +define void @load_factor2_fp128(<4 x fp128>* %ptr) { +; NEON-LABEL: @load_factor2_fp128( +; NEON-NOT: @llvm.aarch64.neon +; NEON: ret void +; NO_NEON-LABEL: @load_factor2_fp128( +; NO_NEON-NOT: @llvm.aarch64.neon +; NO_NEON: ret void +; + %interleaved.vec = load <4 x fp128>, <4 x fp128>* %ptr, align 16 + %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> + %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> + ret void +} diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index 455ed12c663..5938f9d7321 100644 --- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -843,3 +843,14 @@ define void @store_factor4_wide(<32 x i32>* %ptr, <8 x i32> %v0, <8 x i32> %v1, store <32 x i32> %interleaved.vec, <32 x i32>* %ptr, align 4 ret void } + +define void @load_factor2_fp128(<4 x fp128>* %ptr) { +; ALL-LABEL: @load_factor2_fp128( +; ALL-NOT: @llvm.arm.neon +; ALL: ret void +; + %interleaved.vec = load <4 x fp128>, <4 x fp128>* %ptr, align 16 + %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> + %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> + ret void +} -- 2.40.0