From fa16ddf26fb542808d53aee8c2907e80e95fdd00 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 17 Sep 2019 15:32:28 +0000 Subject: [PATCH] [ARM] Add a SelectTAddrModeImm7 for MVE narrow loads and stores We were previously using the SelectT2AddrModeImm7 for both normal and narrowing MVE loads/stores. As the narrowing instructions do not accept sp as a register, it makes little sense to optimise a FrameIndex into the load, only to have to recover that later on. This adds a SelectTAddrModeImm7 which does not do that folding, and uses it for narrowing load/store patterns. Differential Revision: https://reviews.llvm.org/D67489 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 24 +++++++++++++++ lib/Target/ARM/ARMInstrMVE.td | 21 ++++++------- test/CodeGen/Thumb2/mve-stack.ll | 47 ++++++++++++++---------------- 3 files changed, 57 insertions(+), 35 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a59a57327d1..8f6515c423e 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -139,6 +139,8 @@ public: bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); + template + bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); // Thumb 2 Addressing Modes: bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); @@ -1151,6 +1153,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, return false; } +template +bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, + SDValue &OffImm) { + if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, + RHSC)) { + Base = N.getOperand(0); + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + OffImm = + CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); + return true; + } + } + + // Base only. + Base = N; + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + return true; +} + //===----------------------------------------------------------------------===// // Thumb 2 Addressing Modes diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index fdc162eacca..04a4ee73d8b 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -160,7 +160,8 @@ class TMemImm7ShiftOffsetAsmOperand : AsmOperandClass { let RenderMethod = "addMemImmOffsetOperands"; } -class taddrmode_imm7 : MemOperand { +class taddrmode_imm7 : MemOperand, + ComplexPattern", []> { let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand; // They are printed the same way as the T2 imm8 version let PrintMethod = "printT2AddrModeImm8Operand"; @@ -5157,12 +5158,12 @@ let MinAlignment = 2 in { } let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>; + def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), + (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; @@ -5204,9 +5205,9 @@ multiclass MVEExtLoad>; - defm : MVEExtLoad<"8", "16", "8", "B", "", t2addrmode_imm7<0>>; - defm : MVEExtLoad<"4", "32", "16", "H", "_align2", t2addrmode_imm7<1>>; + defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; } diff --git a/test/CodeGen/Thumb2/mve-stack.ll b/test/CodeGen/Thumb2/mve-stack.ll index 0199c6da783..41cc1162ee9 100644 --- a/test/CodeGen/Thumb2/mve-stack.ll +++ b/test/CodeGen/Thumb2/mve-stack.ll @@ -77,10 +77,9 @@ define arm_aapcs_vfpcc void @vstrh32() { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov.i32 q0, #0x6 ; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vmov.i32 q0, #0x6 ; CHECK-NEXT: vstrh.32 q0, [r0, #4] -; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: bl func ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop {r7, pc} @@ -101,10 +100,9 @@ define arm_aapcs_vfpcc void @vstrb32() { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov.i32 q0, #0x6 -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vstrb.32 q0, [r0, #6] ; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: vmov.i32 q0, #0x6 +; CHECK-NEXT: vstrb.32 q0, [r0, #2] ; CHECK-NEXT: bl func ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop {r7, pc} @@ -125,10 +123,9 @@ define arm_aapcs_vfpcc void @vstrb16() { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vstrb.16 q0, [r0, #2] -; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: bl func ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop {r7, pc} @@ -212,16 +209,16 @@ entry: define arm_aapcs_vfpcc <4 x i16> @vldrh32() { ; CHECK-LABEL: vldrh32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl func -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vldrh.u32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u32 q0, [r4, #4] ; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %d = alloca [4 x i16], align 2 %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 0 @@ -235,16 +232,16 @@ entry: define arm_aapcs_vfpcc <4 x i8> @vldrb32() { ; CHECK-LABEL: vldrb32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: add r4, sp, #4 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl func -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vldrb.u32 q0, [r0, #6] +; CHECK-NEXT: vldrb.u32 q0, [r4, #2] ; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %d = alloca [4 x i8], align 2 %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 0 @@ -258,16 +255,16 @@ entry: define arm_aapcs_vfpcc <8 x i8> @vldrb16() { ; CHECK-LABEL: vldrb16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl func -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vldrb.u16 q0, [r0, #2] +; CHECK-NEXT: vldrb.u16 q0, [r4, #2] ; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %d = alloca [8 x i8], align 2 %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 0 -- 2.40.0