From: David Green Date: Sun, 11 Aug 2019 08:42:57 +0000 (+0000) Subject: [ARM] Permit auto-vectorization using MVE X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6cc3211b706ccf5149d644e45f162e0f7c5aa2f4;p=llvm [ARM] Permit auto-vectorization using MVE With enough codegen complete, we can now correctly report the number and size of vector registers for MVE, allowing auto vectorisation. This also allows FP auto-vectorization for MVE without -Ofast/-ffast-math, due to support for IEEE FP arithmetic and parity between scalar and vector FP behaviour. Patch by David Sherwood. Differential Revision: https://reviews.llvm.org/D63728 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368529 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 52f6ea4a6e2..b966c76c7bf 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -101,9 +101,9 @@ public: /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD - /// is IEEE-754 compliant, but it's not covered in this target. + /// and Arm MVE are IEEE-754 compliant. bool isFPVectorizationPotentiallyUnsafe() { - return !ST->isTargetDarwin(); + return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); } /// \name Scalar TTI Implementations @@ -126,6 +126,8 @@ public: if (Vector) { if (ST->hasNEON()) return 16; + if (ST->hasMVEIntegerOps()) + return 8; return 0; } @@ -138,6 +140,8 @@ public: if (Vector) { if (ST->hasNEON()) return 128; + if (ST->hasMVEIntegerOps()) + return 128; return 0; } diff --git a/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll index 369568f6dfa..2a7a2ce6d16 100644 --- a/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll +++ b/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll @@ -1,5 +1,6 @@ ; RUN: opt -mtriple armv7-linux-gnueabihf -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX ; RUN: opt -mtriple armv8-linux-gnu -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX +; RUN: opt -mtriple armv8.1.m-none-eabi -mattr=+mve.fp -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=MVE ; RUN: opt -mtriple armv7-unknwon-darwin -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=DARWIN ; REQUIRES: asserts @@ -44,6 +45,8 @@ for.end: ; preds = %for.end.loopexit, % ; Floating-point loops need fast-math to be vectorizeable ; LINUX: Checking a loop in "sumf" ; LINUX: Potentially unsafe FP op prevents vectorization +; MVE: Checking a loop in "sumf" +; MVE: We can vectorize this loop! ; DARWIN: Checking a loop in "sumf" ; DARWIN: We can vectorize this loop! define void @sumf(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { @@ -110,6 +113,8 @@ for.end: ; preds = %for.end.loopexit, % ; Floating-point loops need fast-math to be vectorizeable ; LINUX: Checking a loop in "redf" ; LINUX: Potentially unsafe FP op prevents vectorization +; MVE: Checking a loop in "redf" +; MVE: We can vectorize this loop! ; DARWIN: Checking a loop in "redf" ; DARWIN: We can vectorize this loop! define float @redf(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {