From f33a6990794fc06d1e54c1cbecca0afa0a3d7d7a Mon Sep 17 00:00:00 2001 From: Ranjeet Singh Date: Tue, 7 Mar 2017 11:17:53 +0000 Subject: [PATCH] [ARM] Reapply r296865 "[ARM] fpscr read/write intrinsics not aware of each other"" The original patch r296865 was reverted as it broke the chromium builds for Android https://bugs.llvm.org/show_bug.cgi?id=32134, this patch reapplies r296865 with a fix to make sure it doesn't cause the build regression. The problem was that intrinsic selection on int_arm_get_fpscr was failing in ISel this was because the code to manually select this intrinsic still thought it was the version with no side-effects (INTRINSIC_WO_CHAIN) which is wrong as it doesn't semantically match the definition in the tablegen code which says it does have side-effects, I've fixed this by updating the intrinsic type to INTRINSIC_W_CHAIN (has side-effects). I've also added a test for this based on Hans original reproducer. Differential Revision: https://reviews.llvm.org/D30645 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297137 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsARM.td | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 7 +++-- test/CodeGen/ARM/fpscr-intrinsics.ll | 44 ++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/ARM/fpscr-intrinsics.ll diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index 24239689a62..18ed24be56d 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -67,7 +67,7 @@ def int_arm_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, // VFP def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [], []>; def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">, Intrinsic<[], [llvm_i32_ty], []>; def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4b6d0e1a34b..6d735a3bed0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4913,9 +4913,10 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); - SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, - DAG.getConstant(Intrinsic::arm_get_fpscr, dl, - MVT::i32)); + SDValue Ops[] = { DAG.getEntryNode(), + DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) }; + + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, diff --git a/test/CodeGen/ARM/fpscr-intrinsics.ll b/test/CodeGen/ARM/fpscr-intrinsics.ll new file mode 100644 index 00000000000..64b97525feb --- /dev/null +++ b/test/CodeGen/ARM/fpscr-intrinsics.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -O0 -mtriple=armv7-eabi -mcpu=cortex-a8 -mattr=+neon,+fp-armv8 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=armv7-eabi -mcpu=cortex-a8 -mattr=+neon,+fp-armv8 | FileCheck %s + +@a = common global double 0.000000e+00, align 8 + +; Function Attrs: noinline nounwind uwtable +define void @strtod() { +entry: + ; CHECK: vmrs r{{[0-9]+}}, fpscr + %0 = call i32 @llvm.flt.rounds() + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store double 5.000000e-01, double* @a, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @fn1(i32* nocapture %p) local_unnamed_addr { +entry: + ; CHECK: vmrs r{{[0-9]+}}, fpscr + %0 = tail call i32 @llvm.arm.get.fpscr() + store i32 %0, i32* %p, align 4 + ; CHECK: vmsr fpscr, r{{[0-9]+}} + tail call void @llvm.arm.set.fpscr(i32 1) + ; CHECK: vmrs r{{[0-9]+}}, fpscr + %1 = tail call i32 @llvm.arm.get.fpscr() + %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 1 + store i32 %1, i32* %arrayidx1, align 4 + ret void +} + +; Function Attrs: nounwind readonly +declare i32 @llvm.arm.get.fpscr() + +; Function Attrs: nounwind writeonly +declare void @llvm.arm.set.fpscr(i32) + +; Function Attrs: nounwind +declare i32 @llvm.flt.rounds() -- 2.40.0