From: Elena Demikhovsky Date: Mon, 27 Jun 2016 11:19:23 +0000 (+0000) Subject: Fixed consecutive memory access detection in Loop Vectorizer. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1abadbff39dfc5ff5f4d08605368f979e567a2ec;p=llvm Fixed consecutive memory access detection in Loop Vectorizer. It did not handle correctly cases without GEP. The following loop wasn't vectorized: for (int i=0; igetType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = - PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || - isNoWrapAddRec(Ptr, AR, PSE, Lp); + bool IsNoWrapAddRec = !ShouldCheckWrap || + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { if (Assume) { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0b3e5562eac..15e8d792c8f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2242,87 +2242,13 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); - auto *SE = PSE.getSE(); - // Make sure that the pointer does not point to structs. - if (Ptr->getType()->getPointerElementType()->isAggregateType()) - return 0; - - // If this value is a pointer induction variable, we know it is consecutive. - PHINode *Phi = dyn_cast_or_null(Ptr); - if (Phi && Inductions.count(Phi)) { - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - - GetElementPtrInst *Gep = getGEPInstruction(Ptr); - if (!Gep) - return 0; - - unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant, then we know it is consecutive. - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { - - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; - - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - - unsigned InductionOperand = getGEPInductionOperand(Gep); - - // Check that all of the gep indices are uniform except for our induction - // operand. - for (unsigned i = 0; i != NumOperands; ++i) - if (i != InductionOperand && - !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - // We can emit wide load/stores only if the last non-zero index is the - // induction variable. - const SCEV *Last = nullptr; - if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep)) - Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); - else { - // Because of the multiplication by a stride we can have a s/zext cast. - // We are going to replace this stride by 1 so the cast is safe to ignore. - // - // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - // %0 = trunc i64 %indvars.iv to i32 - // %mul = mul i32 %0, %Stride1 - // %idxprom = zext i32 %mul to i64 << Safe cast. - // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom - // - Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(), - Gep->getOperand(InductionOperand), Gep); - if (const SCEVCastExpr *C = dyn_cast(Last)) - Last = - (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) - ? C->getOperand() - : Last; - } - if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { - const SCEV *Step = AR->getStepRecurrence(*SE); - - // The memory is consecutive because the last index is consecutive - // and all other indices are loop invariant. - if (Step->isOne()) - return 1; - if (Step->isAllOnesValue()) - return -1; - } + const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : + ValueToValueMap(); + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + if (Stride == 1 || Stride == -1) + return Stride; return 0; } @@ -2658,7 +2584,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Handle consecutive loads/stores. GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (ConsecutiveStride) { - if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + if (Gep && + !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop)) { setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; @@ -2671,9 +2599,6 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); - assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), - OrigLoop) && - "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); @@ -2702,8 +2627,6 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { } Ptr = Builder.Insert(Gep2); } else { // No GEP - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); VectorParts &PtrVal = getVectorValue(Ptr); Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); diff --git a/test/Transforms/LoopVectorize/consec_no_gep.ll b/test/Transforms/LoopVectorize/consec_no_gep.ll new file mode 100644 index 00000000000..4e906bb2659 --- /dev/null +++ b/test/Transforms/LoopVectorize/consec_no_gep.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; Check consecutive memory access without preceding GEP instruction + +; for (int i=0; i + +define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { +entry: + %cmp2 = icmp sgt i32 %len, 0 + br i1 %cmp2, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ] + %to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1 + %val = load float, float* %from.addr.04, align 4 + %incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1 + store float %val, float* %to.addr.03, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %len + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/consec_no_gep2.ll b/test/Transforms/LoopVectorize/consec_no_gep2.ll new file mode 100644 index 00000000000..628a5b376ff --- /dev/null +++ b/test/Transforms/LoopVectorize/consec_no_gep2.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; CHECK-LABEL: @img2buf +; CHECK: store <4 x i32> +; Function Attrs: nounwind +define void @img2buf(i64 %val, i8* nocapture %buf, i32 %N) local_unnamed_addr #0 { +entry: + br label %l2 + +l2: + br label %for.body57.us + +for.body57.us: + %indvars.iv24 = phi i64 [ %val, %l2 ], [ %indvars.iv.next25, %for.body57.us ] + %0 = trunc i64 %indvars.iv24 to i32 + %add77.us = add i32 5, %0 + %mul78.us = shl nsw i32 %add77.us, 2 + %idx.ext79.us = sext i32 %mul78.us to i64 + %add.ptr80.us = getelementptr inbounds i8, i8* %buf, i64 %idx.ext79.us + %ui32.0.add.ptr80.sroa_cast.us = bitcast i8* %add.ptr80.us to i32* + store i32 0, i32* %ui32.0.add.ptr80.sroa_cast.us, align 1 + %indvars.iv.next25 = add nsw i64 %indvars.iv24, 1 + %lftr.wideiv26 = trunc i64 %indvars.iv.next25 to i32 + %exitcond27 = icmp eq i32 %lftr.wideiv26, %N + br i1 %exitcond27, label %l3, label %for.body57.us + +l3: + ret void +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/test/Transforms/LoopVectorize/ptr-induction.ll b/test/Transforms/LoopVectorize/ptr-induction.ll index 47d33352763..e0e1139e0eb 100644 --- a/test/Transforms/LoopVectorize/ptr-induction.ll +++ b/test/Transforms/LoopVectorize/ptr-induction.ll @@ -18,6 +18,7 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ] %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] + %a1.pn = getelementptr inbounds i32, i32* %a.pn, i64 0 %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1 %0 = load i32, i32* %incdec.ptr8, align 1 %add = add nuw nsw i32 %0, %acc.07