let isCodeGenOnly = 0; // So we get asm matcher for it.
let AsmString = asm;
let isPseudo = 1;
+ let hasNoSchedulingInfo = 1;
}
class ARMAsmPseudo<string asm, dag iops, dag oops = (outs)>
/// the function. The first operand is the ID# for this instruction, the second
/// is the index into the MachineConstantPool that this is, the third is the
/// size in bytes of this constant pool entry.
-let hasSideEffects = 0, isNotDuplicable = 1 in
+let hasSideEffects = 0, isNotDuplicable = 1, hasNoSchedulingInfo = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
let hasSideEffects = 1;
}
-let usesCustomInserter = 1, Defs = [CPSR] in {
-
-// Pseudo instruction that combines movs + predicated rsbmi
-// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
+ // Pseudo instruction that combines movs + predicated rsbmi
+ // to implement integer ABS
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
}
-let usesCustomInserter = 1, Defs = [CPSR] in {
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
NoItinerary,
def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
-let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
-let usesCustomInserter = 1, Defs = [CPSR] in
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in
def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary,
[(win__dbzchk tGPR:$divisor)]>;
def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
ComplexDeprecationPredicate<"IT">;
-let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
+let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary,
[(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
let hasSideEffects = 1;
let Size = 0;
let AsmString = "@ COMPILER BARRIER";
+ let hasNoSchedulingInfo = 1;
}
// Pseudo isntruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
NoItinerary, []>, Requires<[IsThumb2]>;
}
: t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> {
let isTerminator = 1;
let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+ let hasNoSchedulingInfo = 1;
}
def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p),
// However, there is no UAL syntax for them, so we keep them around for
// (dis)assembly only.
multiclass vfp_ldstx_mult<string asm, bit L_bit> {
- let Predicates = [HasFPRegs] in {
+ let Predicates = [HasFPRegs], hasNoSchedulingInfo = 1 in {
// Unknown precision
def XIA :
AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
let PostRAScheduler = 1;
let CompleteModel = 0;
+ let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt,
+ IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8,
+ HasV8_3a, HasTrustZone, HasDFB, IsWindows];
}
def : M4UnitL2<WriteDIV>;
def : M4UnitL2I<(instregex "(t|t2)LDM")>;
+def : M4UnitL2I<(instregex "(t|t2)LDR")>;
// Stores we use a latency of 1 as they have no outputs
def : M4UnitL1<WritePreLd>;
def : M4UnitL1I<(instregex "(t|t2)MOV")>;
def : M4UnitL1I<(instrs COPY)>;
-def : M4UnitL1I<(instregex "t2IT")>;
-def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
- "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
+def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>;
+def : M4UnitL1I<(instregex "t2CLREX")>;
+def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]",
+ "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>;
+
+// These instructions are not of much interest to scheduling as they will not
+// be generated or it is not very useful to schedule them. They are here to make
+// the model more complete.
+def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>;
+def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>;
+def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>;
+def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>;
+def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>;
+def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>;
def : ReadAdvance<ReadALU, 0>;
def : ReadAdvance<ReadALUsr, 0>;
def : M4UnitL1<WriteVST2>;
def : M4UnitL1<WriteVST3>;
def : M4UnitL1<WriteVST4>;
+def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>;
+def : M4UnitL2I<(instregex "VMOVD")>;
+def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>;
def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;
; CHECK: smultt
; CHECK: smlalbb
; CHECK: smultt
-; CHECK: smlaldx
; CHECK: smlalbb
-; CHECK: smlaldx
; CHECK: smultt
; CHECK: smlalbb
-; CHECK: smlaldx
; CHECK: smultt
+; CHECK: smlaldx
+; CHECK: smlaldx
+; CHECK: smlaldx
; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
define dso_local arm_aapcscc void @complex_dot_prod(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i32* nocapture %realResult, i32* nocapture %imagResult) {
entry:
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-LE-NEXT: ldr lr, [r3, #2]!
; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: subs r0, #1
; CHECK-LE-NEXT: sxtah r1, r1, lr
+; CHECK-LE-NEXT: subs r0, #1
; CHECK-LE-NEXT: smlad r12, r4, lr, r12
; CHECK-LE-NEXT: bne .LBB0_2
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: .LBB0_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
-; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: smlabb r5, r5, lr, r12
; CHECK-BE-NEXT: add r1, lr
+; CHECK-BE-NEXT: subs r0, #1
+; CHECK-BE-NEXT: smlabb r12, r6, r4, r5
; CHECK-BE-NEXT: bne .LBB0_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: mul r1, lr, r1
; CHECK-BE-NEXT: bne .LBB1_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB2_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: sub.w lr, r2, #2
; CHECK-LE-NEXT: subs r3, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB2_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: ldr lr, [r3, #2]!
-; CHECK-LE-NEXT: asrs r5, r4, #16
-; CHECK-LE-NEXT: smlad r12, r4, lr, r12
+; CHECK-LE-NEXT: ldr r2, [lr, #2]!
+; CHECK-LE-NEXT: ldr r4, [r3, #2]!
+; CHECK-LE-NEXT: asrs r5, r2, #16
+; CHECK-LE-NEXT: smlad r12, r2, r4, r12
; CHECK-LE-NEXT: subs r0, #1
; CHECK-LE-NEXT: mul r1, r5, r1
; CHECK-LE-NEXT: bne .LBB2_2
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: mul r1, r6, r1
; CHECK-BE-NEXT: bne .LBB2_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
;
; CHECK-BE-LABEL: and_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT: push {r4, r5, r6, lr}
+; CHECK-BE-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB3_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-BE-NEXT: .LBB3_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
-; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
-; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: uxth.w lr, lr
-; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT: ldrsh.w r7, [r2, #2]
+; CHECK-BE-NEXT: uxth.w r6, lr
+; CHECK-BE-NEXT: smlabb r5, r5, lr, r12
+; CHECK-BE-NEXT: smlabb r12, r7, r4, r5
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: mul r1, lr, r1
+; CHECK-BE-NEXT: mul r1, r6, r1
; CHECK-BE-NEXT: bne .LBB3_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-BE-NEXT: .LBB3_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
; CHECK-BE-NEXT: smlabb r4, r4, r1, r12
-; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
; CHECK-BE-NEXT: eor.w r6, r1, lr
-; CHECK-BE-NEXT: mul r1, r6, r1
+; CHECK-BE-NEXT: muls r1, r6, r1
+; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: lsl.w lr, r1, #16
; CHECK-BE-NEXT: bne .LBB4_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
-; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
; CHECK-REG-PRESSURE: bne .LBB0_1
for.body: