From: Hans Wennborg Date: Wed, 1 Feb 2017 19:41:46 +0000 (+0000) Subject: Merging r293673: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f951cb196ce3b8ee4806e855a5e05e5235292a1d;p=llvm Merging r293673: ------------------------------------------------------------------------ r293673 | matze | 2017-01-31 10:37:53 -0800 (Tue, 31 Jan 2017) | 6 lines InterleaveAccessPass: Avoid constructing invalid shuffle masks Fix a bug where we would construct shufflevector instructions addressing invalid elements. Differential Revision: https://reviews.llvm.org/D29313 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@293805 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index c8f79d7fb71..ec35b3f6449 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef Mask, unsigned &Factor, /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> static bool isReInterleaveMask(ArrayRef Mask, unsigned &Factor, - unsigned MaxFactor) { + unsigned MaxFactor, unsigned OpNumElts) { unsigned NumElts = Mask.size(); if (NumElts < 4) return false; @@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef Mask, unsigned &Factor, if (StartMask < 0) break; + // We must stay within the vectors; This case can happen with undefs. + if (StartMask + LaneLen > OpNumElts*2) + break; } // Found an interleaved mask of current factor. @@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore( // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor)) + unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements(); + if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); diff --git a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll index 702dfdbb81a..2a257d49081 100644 --- a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll @@ -547,3 +547,21 @@ define void @store_general_mask_factor3_negativestart(<12 x i32>* %ptr, <32 x i3 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4 ret void } + +@g = external global <4 x float> + +; The following does not give a valid interleaved store +; NEON-LABEL: define void @no_interleave +; NEON-NOT: call void @llvm.aarch64.neon.st2 +; NEON: shufflevector +; NEON: store +; NEON: ret void +; NO_NEON-LABEL: define void @no_interleave +; NO_NEON: shufflevector +; NO_NEON: store +; NO_NEON: ret void +define void @no_interleave(<4 x float> %a0) { + %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> + store <4 x float> %v0, <4 x float>* @g, align 16 + ret void +} diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index caaaa21c5a1..21eb8d7a1b0 100644 --- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -626,3 +626,21 @@ define void @store_general_mask_factor3_midstart_pass(<12 x i32>* %ptr, <32 x i3 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4 ret void } + +@g = external global <4 x float> + +; The following does not give a valid interleaved store +; NEON-LABEL: define void @no_interleave +; NEON-NOT: call void @llvm.arm.neon.vst2 +; NEON: shufflevector +; NEON: store +; NEON: ret void +; NO_NEON-LABEL: define void @no_interleave +; NO_NEON: shufflevector +; NO_NEON: store +; NO_NEON: ret void +define void @no_interleave(<4 x float> %a0) { + %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> + store <4 x float> %v0, <4 x float>* @g, align 16 + ret void +}