From: Pete Cooper Date: Wed, 6 May 2015 16:39:17 +0000 (+0000) Subject: [ARM] Fast-Isel was incorrectly selecting <2 x double> adds. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=99413f0d403eaaf0be6ab875a6f638a6067fd97f;p=llvm [ARM] Fast-Isel was incorrectly selecting <2 x double> adds. With neon enabled, we reach SelectBinaryFPOp and are able to get registers for a <2 x double> add. However, we shouldn't actually attempt arithmetic on it as ARMIselLowering says "v2f64 is legal so that QR subregs can be extracted as f64 elements, but neither Neon nor VFP support any arithmetic operations on it." This commit disables SelectBinaryFPOp for any vector types. There's already a FIXME to try handle neon. Doing so would require fixing this conditional which isn't safe for vectors 'VT == MVT::f64 || VT == MVT::i64' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236609 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index dfd8dc5cc2c..97995d31db8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1794,6 +1794,10 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { if (!FPVT.isSimple()) return false; MVT VT = FPVT.getSimpleVT(); + // FIXME: Support vector types where possible. + if (VT.isVector()) + return false; + // We can get here in the case when we want to use NEON for our fp // operations, but can't figure out how to. Just use the vfp instructions // if we have them. diff --git a/test/CodeGen/ARM/fast-isel-vaddd.ll b/test/CodeGen/ARM/fast-isel-vaddd.ll new file mode 100644 index 00000000000..2aa269a9774 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-vaddd.ll @@ -0,0 +1,33 @@ +; RUN: llc %s -o - -verify-machineinstrs -fast-isel=true -mattr=+vfp4 -mattr=+neon | FileCheck %s + +target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" +target triple = "thumbv7s-apple-ios8.0.0" + +%union.DV = type { <2 x double> } + +; Fast-ISel was incorrectly trying to codegen <2 x double> adds and returning only a single vadds +; Check that we generate the 2 vaddd's we expect + +; CHECK: vadd.f64 +; CHECK: vadd.f64 + +define i32 @main(i32 %argc, i8** nocapture readnone %Argv, <2 x double> %tmp31) { +bb: + %Ad = alloca %union.DV, align 16 + %tmp32 = getelementptr inbounds %union.DV, %union.DV* %Ad, i32 0, i32 0 + %tmp33 = fadd <2 x double> %tmp31, %tmp31 + br label %bb37 + +bb37: ; preds = %bb37, %bb + %i.02 = phi i32 [ 0, %bb ], [ %tmp38, %bb37 ] + store <2 x double> %tmp33, <2 x double>* %tmp32, align 16 + %tmp38 = add nuw nsw i32 %i.02, 1 + %exitcond = icmp eq i32 %tmp38, 500000 + br i1 %exitcond, label %bb39, label %bb37 + +bb39: ; preds = %bb37 + call fastcc void @printDV(%union.DV* %Ad) + ret i32 0 +} + +declare hidden fastcc void @printDV(%union.DV* nocapture readonly)