From 1b5ab6391575b85fba209505a6788c389ea4c86c Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Mon, 9 May 2016 23:03:44 +0000 Subject: [PATCH] [LV] Hint at the new loop distribution pragma in optimization remark When we encounter unsafe memory dependencies, loop distribution could help. Even though, the diagnostics is in LAA, it's only currently emitted in the vectorizer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268987 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/LoopAccessAnalysis.cpp | 8 +- .../LoopVectorize/unsafe-dep-remark.ll | 74 +++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/LoopVectorize/unsafe-dep-remark.ll diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 6b1c5f22aa3..cb7ee1cb246 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1694,8 +1694,12 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { << (PtrRtChecking.Need ? "" : " don't") << " need runtime memory checks.\n"); else { - emitAnalysis(LoopAccessReport() << - "unsafe dependent memory operations in loop"); + emitAnalysis( + LoopAccessReport() + << "unsafe dependent memory operations in loop. Use " + "#pragma loop distribute(enable) to allow loop distribution " + "to attempt to isolate the offending operations into a separate " + "loop"); DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); } } diff --git a/test/Transforms/LoopVectorize/unsafe-dep-remark.ll b/test/Transforms/LoopVectorize/unsafe-dep-remark.ll new file mode 100644 index 00000000000..5fff82554f4 --- /dev/null +++ b/test/Transforms/LoopVectorize/unsafe-dep-remark.ll @@ -0,0 +1,74 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s + +; ModuleID = '/tmp/kk.c' +source_filename = "/tmp/kk.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; 1 void success (char *A, char *B, char *C, char *D, char *E, int N) { +; 2 for(int i = 0; i < N; i++) { +; 3 A[i + 1] = A[i] + B[i]; +; 4 C[i] = D[i] * E[i]; +; 5 } +; 6 } + +; CHECK: remark: /tmp/kk.c:3:16: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop + +define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 { +entry: + %cmp28 = icmp sgt i32 %N, 0, !dbg !8 + br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !9 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !11 + %0 = load i8, i8* %arrayidx, align 1, !dbg !11, !tbaa !12 + %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !15 + %1 = load i8, i8* %arrayidx2, align 1, !dbg !15, !tbaa !12 + %add = add i8 %1, %0, !dbg !16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9 + %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !17 + store i8 %add, i8* %arrayidx7, align 1, !dbg !18, !tbaa !12 + %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !19 + %2 = load i8, i8* %arrayidx9, align 1, !dbg !19, !tbaa !12 + %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !20 + %3 = load i8, i8* %arrayidx12, align 1, !dbg !20, !tbaa !12 + %mul = mul i8 %3, %2, !dbg !21 + %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !22 + store i8 %mul, i8* %arrayidx16, align 1, !dbg !23, !tbaa !12 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9 + %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 2, column: 20, scope: !6) +!9 = !DILocation(line: 2, column: 3, scope: !6) +!10 = !DILocation(line: 6, column: 1, scope: !6) +!11 = !DILocation(line: 3, column: 16, scope: !6) +!12 = !{!13, !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 3, column: 23, scope: !6) +!16 = !DILocation(line: 3, column: 21, scope: !6) +!17 = !DILocation(line: 3, column: 5, scope: !6) +!18 = !DILocation(line: 3, column: 14, scope: !6) +!19 = !DILocation(line: 4, column: 12, scope: !6) +!20 = !DILocation(line: 4, column: 19, scope: !6) +!21 = !DILocation(line: 4, column: 17, scope: !6) +!22 = !DILocation(line: 4, column: 5, scope: !6) +!23 = !DILocation(line: 4, column: 10, scope: !6) -- 2.50.1