From: Karl-Johan Karlsson Date: Tue, 14 Feb 2017 08:14:06 +0000 (+0000) Subject: [LoopVectorize] Added address space check when analysing interleaved accesses X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=26a008fb2f3bf3a7ffb717d049276ef9e25d05db;p=llvm [LoopVectorize] Added address space check when analysing interleaved accesses Prevent memory objects of different address spaces to be part of the same load/store groups when analysing interleaved accesses. This is fixing pr31900. Reviewers: HaoLiu, mssimpso, mkuper Reviewed By: mssimpso, mkuper Subscribers: llvm-commits, efriedma, mzolotukhin Differential Revision: https://reviews.llvm.org/D29717 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295038 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 401b942d307..84935d6a408 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5997,6 +5997,12 @@ void InterleavedAccessInfo::analyzeInterleaving( if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) continue; + // Ignore A if the memory object of A and B don't belong to the same + // address space + if (DesA.Scev->getType()->getPointerAddressSpace() != + DesB.Scev->getType()->getPointerAddressSpace()) + continue; + // Calculate the distance from A to B. const SCEVConstant *DistToB = dyn_cast( PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); @@ -6040,35 +6046,35 @@ void InterleavedAccessInfo::analyzeInterleaving( releaseGroup(Group); // Remove interleaved groups with gaps (currently only loads) whose memory - // accesses may wrap around. We have to revisit the getPtrStride analysis, - // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does + // accesses may wrap around. We have to revisit the getPtrStride analysis, + // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does // not check wrapping (see documentation there). - // FORNOW we use Assume=false; - // TODO: Change to Assume=true but making sure we don't exceed the threshold + // FORNOW we use Assume=false; + // TODO: Change to Assume=true but making sure we don't exceed the threshold // of runtime SCEV assumptions checks (thereby potentially failing to - // vectorize altogether). + // vectorize altogether). // Additional optional optimizations: - // TODO: If we are peeling the loop and we know that the first pointer doesn't + // TODO: If we are peeling the loop and we know that the first pointer doesn't // wrap then we can deduce that all pointers in the group don't wrap. - // This means that we can forcefully peel the loop in order to only have to - // check the first pointer for no-wrap. When we'll change to use Assume=true + // This means that we can forcefully peel the loop in order to only have to + // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. // for (InterleaveGroup *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide - // load would wrap around the address space we would do a memory access at - // nullptr even without the transformation. - if (Group->getNumMembers() == Group->getFactor()) + // load would wrap around the address space we would do a memory access at + // nullptr even without the transformation. + if (Group->getNumMembers() == Group->getFactor()) continue; - // Case 2: If first and last members of the group don't wrap this implies + // Case 2: If first and last members of the group don't wrap this implies // that all the pointers in the group don't wrap. // So we check only group member 0 (which is always guaranteed to exist), - // and group member Factor - 1; If the latter doesn't exist we rely on + // and group member Factor - 1; If the latter doesn't exist we rely on // peeling (if it is a non-reveresed accsess -- see Case 3). Value *FirstMemberPtr = getPointerOperand(Group->getMember(0)); - if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, + if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "first group member potentially pointer-wrapping.\n"); diff --git a/test/Transforms/LoopVectorize/AArch64/pr31900.ll b/test/Transforms/LoopVectorize/AArch64/pr31900.ll new file mode 100644 index 00000000000..5ea38a4a246 --- /dev/null +++ b/test/Transforms/LoopVectorize/AArch64/pr31900.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -mtriple=aarch64-apple-ios -loop-vectorize -enable-interleaved-mem-accesses -force-vector-width=2 < %s | FileCheck %s + +; Reproducer for address space fault in the LoopVectorizer (pr31900). Added +; different sized address space pointers (p:16:16-p4:32:16) to the aarch64 +; datalayout to reproduce the fault. + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16" + +; Check that all the loads are scalarized +; CHECK: load i16, i16* +; CHECK: load i16, i16* +; CHECK: load i16, i16 addrspace(4)* +; CHECK: load i16, i16 addrspace(4)* + +%rec1445 = type { i16, i16, i16, i16, i16 } + +define void @foo() { +bb1: + br label %bb4 + +bb4: + %tmp1 = phi i16 [ undef, %bb1 ], [ %_tmp1013, %bb4 ] + %tmp2 = phi %rec1445* [ undef, %bb1 ], [ %_tmp1015, %bb4 ] + %tmp3 = phi %rec1445 addrspace(4)* [ undef, %bb1 ], [ %_tmp1017, %bb4 ] + %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1 + %_tmp987 = load i16, i16* %0, align 1 + %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1 + %_tmp993 = load i16, i16 addrspace(4)* %1, align 1 + %_tmp1013 = add i16 %tmp1, 1 + %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1 + %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1 + %_tmp1019 = icmp ult i16 %_tmp1013, 24 + br i1 %_tmp1019, label %bb4, label %bb16 + +bb16: + unreachable +}