From 6e7d96de4f84f9809544eb5129a0a31274f84de3 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 25 Sep 2017 22:07:33 +0000 Subject: [PATCH] Revert r312724 ("[ARM] Remove redundant vcvt patterns."). It leads to some improvements, but also a regression for the simple case, so it's not clearly a good idea. test/CodeGen/ARM/vcvt.ll now has test coverage to show the difference. Ultimately, the right solution is probably to custom-lower fp-to-int conversions, to something like ARMISD::VCVT_F32_S32 plus a bitcast. It's hard to do the right thing when the implicit bitcast isn't visible to DAG transforms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314169 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrVFP.td | 14 ++++++++++++++ test/CodeGen/ARM/vcvt.ll | 28 ++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0549a198b5b..362ffedf32c 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1435,6 +1435,9 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; + + def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; } def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, @@ -1452,6 +1455,10 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), + addrmode5:$ptr), + (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; + def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", @@ -1471,6 +1478,9 @@ def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; + + def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; } def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, @@ -1488,6 +1498,10 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), + addrmode5:$ptr), + (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; + def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index 884a1a47ae2..5f470d60707 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -355,9 +355,11 @@ define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) { ; CHECK: @ BB#0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.s32.f64 s0, d16 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: str r0, [r2] -; CHECK-NEXT: str r0, [r3] +; CHECK-NEXT: vstr s0, [r2] +; CHECK-NEXT: vcvt.s32.f64 s0, d16 +; CHECK-NEXT: vcvt.s32.f64 s2, d16 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vstr s0, [r3] ; CHECK-NEXT: mov pc, lr %conv = fptosi double %c to i32 store i32 %conv, i32* %p, align 4 @@ -370,9 +372,11 @@ define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) { ; CHECK: @ BB#0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.u32.f64 s0, d16 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: str r0, [r2] -; CHECK-NEXT: str r0, [r3] +; CHECK-NEXT: vstr s0, [r2] +; CHECK-NEXT: vcvt.u32.f64 s0, d16 +; CHECK-NEXT: vcvt.u32.f64 s2, d16 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vstr s0, [r3] ; CHECK-NEXT: mov pc, lr %conv = fptoui double %c to i32 store i32 %conv, i32* %p, align 4 @@ -385,8 +389,7 @@ define void @double_to_sint_store(double %c, i32* nocapture %p) { ; CHECK: @ BB#0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.s32.f64 s0, d16 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: str r0, [r2] +; CHECK-NEXT: vstr s0, [r2] ; CHECK-NEXT: mov pc, lr %conv = fptosi double %c to i32 store i32 %conv, i32* %p, align 4 @@ -398,8 +401,7 @@ define void @double_to_uint_store(double %c, i32* nocapture %p) { ; CHECK: @ BB#0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.u32.f64 s0, d16 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: str r0, [r2] +; CHECK-NEXT: vstr s0, [r2] ; CHECK-NEXT: mov pc, lr %conv = fptoui double %c to i32 store i32 %conv, i32* %p, align 4 @@ -411,8 +413,7 @@ define void @float_to_sint_store(float %c, i32* nocapture %p) { ; CHECK: @ BB#0: ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.s32.f32 s0, s0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: str r0, [r1] +; CHECK-NEXT: vstr s0, [r1] ; CHECK-NEXT: mov pc, lr %conv = fptosi float %c to i32 store i32 %conv, i32* %p, align 4 @@ -424,8 +425,7 @@ define void @float_to_uint_store(float %c, i32* nocapture %p) { ; CHECK: @ BB#0: ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.u32.f32 s0, s0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: str r0, [r1] +; CHECK-NEXT: vstr s0, [r1] ; CHECK-NEXT: mov pc, lr %conv = fptoui float %c to i32 store i32 %conv, i32* %p, align 4 -- 2.40.0