From: Roman Tereshin Date: Fri, 18 Jan 2019 20:13:42 +0000 (+0000) Subject: [CGP] Check for existing inttotpr before creating new one X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=61b2398badd5818e69adca67e9b00e0e15acc79d;p=llvm [CGP] Check for existing inttotpr before creating new one Make sure CodeGenPrepare doesn't emit multiple inttoptr instructions of the same integer value while sinking address computations, but rather CSEs them on the fly: excessive inttoptr's confuse SCEV into thinking that related pointers have nothing to do with each other. This problem blocks LoadStoreVectorizer from vectorizing some of the loads / stores in a downstream target. Reviewed By: hfinkel Differential Revision: https://reviews.llvm.org/D56838 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351582 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c35f8666fa3..0c7c9deaa07 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -4664,13 +4664,22 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // will look through it and provide only the integer value. In that case, // use it here. if (!DL->isNonIntegralPointerType(Addr->getType())) { + const auto getResultPtr = [MemoryInst, Addr, + &Builder](Value *Reg) -> Value * { + for (User *U : Reg->users()) + if (auto *I2P = dyn_cast(U)) + if (I2P->getType() == Addr->getType() && + I2P->getParent() == MemoryInst->getParent()) { + I2P->moveBefore(MemoryInst->getParent()->getFirstNonPHI()); + return I2P; + } + return Builder.CreateIntToPtr(Reg, Addr->getType(), "sunkaddr"); + }; if (!ResultPtr && AddrMode.BaseReg) { - ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), - "sunkaddr"); + ResultPtr = getResultPtr(AddrMode.BaseReg); AddrMode.BaseReg = nullptr; } else if (!ResultPtr && AddrMode.Scale == 1) { - ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), - "sunkaddr"); + ResultPtr = getResultPtr(AddrMode.ScaledReg); AddrMode.Scale = 0; } } diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll new file mode 100644 index 00000000000..5f1fd98cbb7 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll @@ -0,0 +1,40 @@ +; RUN: opt -mtriple=x86_64-- -codegenprepare %s -S -o - | FileCheck %s --check-prefix=CGP +; RUN: opt -mtriple=x86_64-- -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s --check-prefix=LSV + +; Make sure CodeGenPrepare doesn't emit multiple inttoptr instructions +; of the same integer value while sinking address computations, but +; rather CSEs them on the fly: excessive inttoptr's confuse SCEV +; into thinking that related pointers have nothing to do with each other. +; +; Triggering this problem involves having just right addressing modes, +; and verifying that the motivating pass (LoadStoreVectorizer) is able +; to benefit from it - just right LSV-policies. Hence the atypical combination +; of the target and datalayout / address spaces in this test. + +target datalayout = "p1:32:32:32" + +define void @main(i32 %tmp, i32 %off) { +; CGP: = inttoptr +; CGP-NOT: = inttoptr +; LSV: = load <2 x float> +; LSV: = load <2 x float> +entry: + %tmp1 = inttoptr i32 %tmp to float addrspace(1)* + %arrayidx.i.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 %off + %add20.i.7 = add i32 %off, 1 + %arrayidx22.i.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 %add20.i.7 + br label %for.body + +for.body: + %tmp8 = phi float [ undef, %entry ], [ %tmp62, %for.body ] + %tmp28 = load float, float addrspace(1)* %arrayidx.i.7 + %tmp29 = load float, float addrspace(1)* %arrayidx22.i.7 + %arrayidx.i321.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 0 + %tmp43 = load float, float addrspace(1)* %arrayidx.i321.7 + %arrayidx22.i327.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 1 + %tmp44 = load float, float addrspace(1)* %arrayidx22.i327.7 + %tmp62 = tail call fast float @foo(float %tmp8, float %tmp44, float %tmp43, float %tmp29, float %tmp28) + br label %for.body +} + +declare float @foo(float, float, float, float, float)