From: Matthew Simpson Date: Tue, 7 Mar 2017 18:47:30 +0000 (+0000) Subject: [LV] Consider users that are memory accesses in uniforms expansion step X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=30bc56bd3cc28618d687ea44a85ef8148780adb7;p=llvm [LV] Consider users that are memory accesses in uniforms expansion step When expanding the set of uniform instructions beyond the seed instructions (e.g., consecutive pointers), we mark a new instruction uniform if all its loop-varying users are uniform. We should also allow users that are consecutive or interleaved memory accesses. This fixes cases where we have an instruction that is used as the pointer operand of a consecutive access but also used by a non-memory instruction that later becomes uniform as part of the expansion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297179 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 763ce083837..836a38d9813 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5675,7 +5675,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { continue; auto *OI = cast(OV); if (all_of(OI->users(), [&](User *U) -> bool { - return isOutOfScope(U) || Worklist.count(cast(U)); + auto *J = cast(U); + return !TheLoop->contains(J) || Worklist.count(J) || + (OI == getPointerOperand(J) && isUniformDecision(J, VF)); })) { Worklist.insert(OI); DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); diff --git a/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 88b2aa36b08..125829090c3 100644 --- a/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -438,3 +438,53 @@ for.end: %tmp5 = phi i32 [ %tmp2, %for.body ] ret i32 %tmp5 } + +; INTER-LABEL: bitcast_pointer_operand +; +; Check that a pointer operand having a user other than a memory access is +; recognized as uniform after vectorization. In this test case, %tmp1 is a +; bitcast that is used by a load and a getelementptr instruction (%tmp2). Once +; %tmp2 is marked uniform, %tmp1 should be marked uniform as well. +; +; INTER: LV: Found uniform instruction: %cond = icmp slt i64 %i.next, %n +; INTER-NEXT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3 +; INTER-NEXT: LV: Found uniform instruction: %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i +; INTER-NEXT: LV: Found uniform instruction: %tmp1 = bitcast i64* %tmp0 to i8* +; INTER-NEXT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i +; INTER-NEXT: LV: Found uniform instruction: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] +; INTER-NEXT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 1 +; INTER: vector.body: +; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* %A, i64 [[INDEX]] +; INTER-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <32 x i8>* +; INTER-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 1 +; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> +; INTER-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> +; INTER-NEXT: [[TMP6:%.*]] = xor <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]] +; INTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* %B, i64 [[INDEX]] +; INTER-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* +; INTER-NEXT: store <4 x i8> [[TMP6]], <4 x i8>* [[TMP8]], align 1 +; INTER-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; INTER: br i1 {{.*}}, label %middle.block, label %vector.body +; +define void @bitcast_pointer_operand(i64* %A, i8* %B, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i + %tmp1 = bitcast i64* %tmp0 to i8* + %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3 + %tmp3 = load i8, i8* %tmp2, align 1 + %tmp4 = load i8, i8* %tmp1, align 1 + %tmp5 = xor i8 %tmp3, %tmp4 + %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i + store i8 %tmp5, i8* %tmp6 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +}