From: Matthew Simpson Date: Mon, 13 Feb 2017 18:02:35 +0000 (+0000) Subject: Revert "[LV] Extend trunc optimization to all IVs with constant integer steps" X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d1fc5442e7e661e82eacb8a84ee7a2b9646f4598;p=llvm Revert "[LV] Extend trunc optimization to all IVs with constant integer steps" This reverts commit r294967. This patch caused execution time slowdowns in a few LLVM test-suite tests, as reported by the clang-cmake-aarch64-quick bot. I'm reverting to investigate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294973 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 5513c7fd62a..401b942d307 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4879,15 +4879,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and // (c) other casts depend on pointer size. - if (auto *Trunc = dyn_cast(CI)) - if (auto *Phi = dyn_cast(Trunc->getOperand(0))) { - auto II = Legal->getInductionVars()->find(Phi); - if (II != Legal->getInductionVars()->end()) - if (II->second.getConstIntStepValue()) { - widenIntInduction(Phi, Trunc); - break; - } - } + auto ID = Legal->getInductionVars()->lookup(OldInduction); + if (isa(CI) && CI->getOperand(0) == OldInduction && + ID.getConstIntStepValue()) { + widenIntInduction(OldInduction, cast(CI)); + break; + } /// Vectorize casts. Type *DestTy = @@ -7227,17 +7224,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - // We optimize the truncation of induction variables having constant - // integer steps. The cost of these truncations is the same as the scalar - // operation. - if (auto *Trunc = dyn_cast(I)) - if (auto *Phi = dyn_cast(Trunc->getOperand(0))) { - auto II = Legal->getInductionVars()->find(Phi); - if (II != Legal->getInductionVars()->end()) - if (II->second.getConstIntStepValue()) - return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), - Trunc->getSrcTy()); - } + // We optimize the truncation of induction variable. + // The cost of these is the same as the scalar operation. + if (I->getOpcode() == Instruction::Trunc && + Legal->isInductionVariable(I->getOperand(0))) + return TTI.getCastInstrCost(I->getOpcode(), I->getType(), + I->getOperand(0)->getType()); Type *SrcScalarTy = I->getOperand(0)->getType(); Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index da20a2a7c4d..85f00b4dfc8 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -773,34 +773,3 @@ for.body: exit: ret void } - -; CHECK-LABEL: @non_primary_iv_trunc( -; CHECK: vector.body: -; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: [[VEC_IND:%.*]] = phi <2 x i32> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] -; CHECK: [[TMP3:%.*]] = add i64 %index, 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %a, i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: %index.next = add i64 %index, 2 -; CHECK: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body -define void @non_primary_iv_trunc(i32* %a, i64 %n) { -entry: - br label %for.body - -for.body: - %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] - %j = phi i64 [ %j.next, %for.body ], [ 0, %entry ] - %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i - %tmp1 = trunc i64 %j to i32 - store i32 %tmp1, i32* %tmp0, align 4 - %i.next = add nuw nsw i64 %i, 1 - %j.next = add nuw nsw i64 %j, 2 - %cond = icmp slt i64 %i.next, %n - br i1 %cond, label %for.body, label %for.end - -for.end: - ret void -} diff --git a/test/Transforms/LoopVectorize/reverse_iter.ll b/test/Transforms/LoopVectorize/reverse_iter.ll index bd057698280..a6e2abda36d 100644 --- a/test/Transforms/LoopVectorize/reverse_iter.ll +++ b/test/Transforms/LoopVectorize/reverse_iter.ll @@ -2,8 +2,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -; PR15882: This test ensures that we do not produce wrapping arithmetic when -; creating constant reverse step vectors. +; Make sure that the reverse iterators are calculated using 64bit arithmetic, not 32. ; ; int foo(int n, int *A) { ; int sum; @@ -14,7 +13,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; ;CHECK-LABEL: @foo( -;CHECK: +;CHECK: ;CHECK: ret define i32 @foo(i32 %n, i32* nocapture %A) { %1 = icmp sgt i32 %n, 0