From 94fdc9b458036f430370fc89713bd3dd4b7d82e8 Mon Sep 17 00:00:00 2001 From: Matthew Simpson Date: Fri, 7 Jul 2017 16:15:05 +0000 Subject: [PATCH] [ARM] Implement interleaved access bug fix from r306334 r306334 fixed a bug in AArch64 dealing with wide interleaved accesses having pointer types. The bug also exists in ARM, so this patch copies over the fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 4 ++- .../ARM/interleaved-accesses.ll | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d9319cb63a1..4ff9f79898f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index 5938f9d7321..715c9413a81 100644 --- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -854,3 +854,32 @@ define void @load_factor2_fp128(<4 x fp128>* %ptr) { %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> ret void } + +define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) { +; NEON-LABEL: @load_factor2_wide_pointer( +; NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32*>* %ptr to i32* +; NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +; NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4) +; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 +; NEON-NEXT: [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x i32*> +; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 +; NEON-NEXT: [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x i32*> +; NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8 +; NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +; NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP8]], i32 4) +; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 +; NEON-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*> +; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 +; NEON-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x i32*> +; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32*> [[TMP4]], <4 x i32*> [[TMP10]], <8 x i32> +; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> [[TMP12]], <8 x i32> +; NEON-NEXT: ret void +; NO_NEON-LABEL: @load_factor2_wide_pointer( +; NO_NEON-NOT: @llvm.arm.neon +; NO_NEON: ret void +; + %interleaved.vec = load <16 x i32*>, <16 x i32*>* %ptr, align 4 + %v0 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> + %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> + ret void +} -- 2.50.0