From 1ae9ed698a7361047aa36d785903cf1f36444f1b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 31 Jul 2018 14:02:43 +0000 Subject: [PATCH] [SLP] Fix PR38339: Instruction does not dominate all uses! Summary: If the ExtractElement instructions can be optimized out during the vectorization and we need to reshuffle the parent vector, this ShuffleInstruction may be inserted in the wrong place causing compiler to produce incorrect code. Reviewers: spatel, RKSimon, mkuper, hfinkel, javed.absar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D49928 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338380 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++++ .../SLPVectorizer/AArch64/PR38339.ll | 29 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 test/Transforms/SLPVectorizer/AArch64/PR38339.ll diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1ea12a38834..5c2efe885e2 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // TODO: Merge this shuffle with the ReorderShuffleMask. if (!E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); + else if (auto *I = dyn_cast(V)) + Builder.SetInsertPoint(I->getParent(), + std::next(I->getIterator())); + else + Builder.SetInsertPoint(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } diff --git a/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll new file mode 100644 index 00000000000..1ab4a13260e --- /dev/null +++ b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s + +define void @f1(<2 x i16> %x, i16* %a) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0 +; CHECK-NEXT: store i16 [[TMP1]], i16* [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: ret void +; + %t2 = extractelement <2 x i16> %x, i32 0 + %t3 = extractelement <2 x i16> %x, i32 1 + %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 + %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 + %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 + %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 + store i16 %t2, i16* %a + store i16 %t2, i16* %ptr0 + store i16 %t3, i16* %ptr1 + store i16 %t3, i16* %ptr2 + store i16 %t2, i16* %ptr3 + ret void +} -- 2.50.1