if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
return false;
+ // Skip if the vector has f16 elements: even though we could do an i16 vldN,
+ // we can't hold the f16 vectors and will end up converting via f32.
+ if (EltTy->isHalfTy())
+ return false;
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
+
Type *Tys[] = { VecTy, Int8Ptr };
Function *VldnFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
EltIs64Bits)
return false;
+ // Skip if the vector has f16 elements: even though we could do an i16 vldN,
+ // we can't hold the f16 vectors and will end up converting via f32.
+ if (EltTy->isHalfTy())
+ return false;
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
+
Type *Tys[] = { Int8Ptr, SubVecTy };
Function *VstNFunc = Intrinsic::getDeclaration(
SI->getModule(), StoreInts[Factor - 2], Tys);
ret void
}
+define void @load_f16_factor2(<8 x half>* %ptr) {
+; ALL-LABEL: @load_f16_factor2(
+; ALL-NOT: @llvm.arm.neon
+; ALL: ret void
+;
+ %interleaved.vec = load <8 x half>, <8 x half>* %ptr, align 4
+ %v0 = shufflevector <8 x half> %interleaved.vec, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %v1 = shufflevector <8 x half> %interleaved.vec, <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret void
+}
+
+define void @store_f16_factor2(<8 x half>* %ptr, <4 x half> %v0, <4 x half> %v1) {
+; ALL-LABEL: @store_f16_factor2(
+; ALL-NOT: @llvm.arm.neon
+; ALL: ret void
+;
+ %interleaved.vec = shufflevector <4 x half> %v0, <4 x half> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+ store <8 x half> %interleaved.vec, <8 x half>* %ptr, align 4
+ ret void
+}
+
define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
; ALL-LABEL: @load_illegal_factor2(
; ALL-NOT: @llvm.arm.neon