From becaf19d2d9664a3b1aab5be26519a0c92408342 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 31 Jan 2017 18:37:53 +0000 Subject: [PATCH] InterleaveAccessPass: Avoid constructing invalid shuffle masks Fix a bug where we would construct shufflevector instructions addressing invalid elements. Differential Revision: https://reviews.llvm.org/D29313 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InterleavedAccessPass.cpp | 8 ++++++-- .../AArch64/interleaved-accesses.ll | 18 ++++++++++++++++++ .../ARM/interleaved-accesses.ll | 18 ++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index c8f79d7fb71..ec35b3f6449 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef Mask, unsigned &Factor, /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> static bool isReInterleaveMask(ArrayRef Mask, unsigned &Factor, - unsigned MaxFactor) { + unsigned MaxFactor, unsigned OpNumElts) { unsigned NumElts = Mask.size(); if (NumElts < 4) return false; @@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef Mask, unsigned &Factor, if (StartMask < 0) break; + // We must stay within the vectors; This case can happen with undefs. + if (StartMask + LaneLen > OpNumElts*2) + break; } // Found an interleaved mask of current factor. @@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore( // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor)) + unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements(); + if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); diff --git a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll index 702dfdbb81a..2a257d49081 100644 --- a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll @@ -547,3 +547,21 @@ define void @store_general_mask_factor3_negativestart(<12 x i32>* %ptr, <32 x i3 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4 ret void } + +@g = external global <4 x float> + +; The following does not give a valid interleaved store +; NEON-LABEL: define void @no_interleave +; NEON-NOT: call void @llvm.aarch64.neon.st2 +; NEON: shufflevector +; NEON: store +; NEON: ret void +; NO_NEON-LABEL: define void @no_interleave +; NO_NEON: shufflevector +; NO_NEON: store +; NO_NEON: ret void +define void @no_interleave(<4 x float> %a0) { + %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> + store <4 x float> %v0, <4 x float>* @g, align 16 + ret void +} diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index caaaa21c5a1..21eb8d7a1b0 100644 --- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -626,3 +626,21 @@ define void @store_general_mask_factor3_midstart_pass(<12 x i32>* %ptr, <32 x i3 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4 ret void } + +@g = external global <4 x float> + +; The following does not give a valid interleaved store +; NEON-LABEL: define void @no_interleave +; NEON-NOT: call void @llvm.arm.neon.vst2 +; NEON: shufflevector +; NEON: store +; NEON: ret void +; NO_NEON-LABEL: define void @no_interleave +; NO_NEON: shufflevector +; NO_NEON: store +; NO_NEON: ret void +define void @no_interleave(<4 x float> %a0) { + %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> + store <4 x float> %v0, <4 x float>* @g, align 16 + ret void +} -- 2.40.0