From 330c5d954fa7999f3c2adb32b0513406f143f9c9 Mon Sep 17 00:00:00 2001 From: Dorit Nuzman Date: Tue, 12 Dec 2017 08:57:43 +0000 Subject: [PATCH] [LV] Ignore the cost of values that will not appear in the vectorized loop VecValuesToIgnore holds values that will not appear in the vectorized loop. We should therefore ignore their cost when VF > 1. Differential Revision: https://reviews.llvm.org/D40883 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320463 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +- .../LoopVectorize/X86/reduction-small-size.ll | 80 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopVectorize/X86/reduction-small-size.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c5d62d0b97d..440641bd91c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6877,7 +6877,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { continue; // Skip ignored values. - if (ValuesToIgnore.count(&I)) + if (ValuesToIgnore.count(&I) || + (VF > 1 && VecValuesToIgnore.count(&I))) continue; VectorizationCostTy C = getInstructionCost(&I, VF); diff --git a/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/test/Transforms/LoopVectorize/X86/reduction-small-size.ll new file mode 100644 index 00000000000..7c29faa51e6 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/reduction-small-size.ll @@ -0,0 +1,80 @@ +; REQUIRES: asserts +; RUN: opt < %s -loop-vectorize -mcpu=core-axv2 -force-vector-interleave=1 -dce -instcombine -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Make sure we ignore the costs of the redundant reduction casts +; char reduction_i8(char *a, char *b, int n) { +; char sum = 0; +; for (int i = 0; i < n; ++i) +; sum += (a[i] + b[i]); +; return sum; +; } +; + +; CHECK-LABEL: reduction_i8 +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = phi +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = phi +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = getelementptr +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = load +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = zext i8 %{{.*}} to i32 +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = getelementptr +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = load +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = zext i8 %{{.*}} to i32 +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = and i32 %{{.*}}, 255 +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = add +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = add +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = add +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = trunc +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: %{{.*}} = icmp +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 1 For instruction: br +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = phi +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = phi +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = getelementptr +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = load +; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = zext i8 %{{.*}} to i32 +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = getelementptr +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = load +; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = zext i8 %{{.*}} to i32 +; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = and i32 %{{.*}}, 255 +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = add +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = add +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = add +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = trunc +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: %{{.*}} = icmp +; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction: br +; +define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { +entry: + %cmp.12 = icmp sgt i32 %n, 0 + br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + br label %for.body + +for.cond.for.cond.cleanup_crit_edge: + %add5.lcssa = phi i32 [ %add5, %for.body ] + %conv6 = trunc i32 %add5.lcssa to i8 + br label %for.cond.cleanup + +for.cond.cleanup: + %sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ] + ret i8 %sum.0.lcssa + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv + %1 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %1 to i32 + %conv4 = and i32 %sum.013, 255 + %add = add nuw nsw i32 %conv, %conv4 + %add5 = add nuw nsw i32 %add, %conv3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body +} -- 2.50.1