From f755ea8045b153a34b676e8c44fa509b8012d47a Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 25 Jun 2019 12:04:31 +0000 Subject: [PATCH] [ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Thumb2ITBlockPass.cpp | 16 +++-- test/CodeGen/ARM/mve-vpt-block2.mir | 77 ++++++++++++++++++++++++ test/CodeGen/ARM/mve-vpt-block3.mir | 82 +++++++++++++++++++++++++ test/CodeGen/ARM/mve-vpt-block4.mir | 90 ++++++++++++++++++++++++++++ test/CodeGen/ARM/mve-vpt-block5.mir | 88 +++++++++++++++++++++++++++ test/CodeGen/ARM/mve-vpt-block6.mir | 84 ++++++++++++++++++++++++++ 6 files changed, 433 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/ARM/mve-vpt-block2.mir create mode 100644 test/CodeGen/ARM/mve-vpt-block3.mir create mode 100644 test/CodeGen/ARM/mve-vpt-block4.mir create mode 100644 test/CodeGen/ARM/mve-vpt-block5.mir create mode 100644 test/CodeGen/ARM/mve-vpt-block6.mir diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 9c7cbf8d7f9..3143eb9840e 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -388,14 +388,22 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { MachineInstrBuilder MIBuilder = BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST)); - MachineInstr *LastMI = MI; - MachineBasicBlock::iterator InsertPos = MIBuilder.getInstr(); - // The mask value for the VPST instruction is T = 0b1000 = 8 MIBuilder.addImm(VPTMaskValue::T); - finalizeBundle(Block, InsertPos.getInstrIterator(), + MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr(); + int VPTInstCnt = 1; + ARMVCC::VPTCodes NextPred; + + do { + ++MBIter; + NextPred = getVPTInstrPredicate(*MBIter, PredReg); + } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4); + + MachineInstr *LastMI = &*MBIter; + finalizeBundle(Block, VPSTInsertPos.getInstrIterator(), ++LastMI->getIterator()); + Modified = true; LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump();); diff --git a/test/CodeGen/ARM/mve-vpt-block2.mir b/test/CodeGen/ARM/mve-vpt-block2.mir new file mode 100644 index 00000000000..4c43afe5b1d --- /dev/null +++ b/test/CodeGen/ARM/mve-vpt-block2.mir @@ -0,0 +1,77 @@ +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %0, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %1 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + + +... +--- +name: test_vminnmq_m_f32_v2 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$q3', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2, $q3, $r0 + + ; CHECK: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 + + $vpr = VMSR_P0 killed $r0, 14, $noreg + renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, killed renamable $q0 + renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, killed renamable $q1 + $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 + tBX_RET 14, $noreg, implicit $q0 + +... diff --git a/test/CodeGen/ARM/mve-vpt-block3.mir b/test/CodeGen/ARM/mve-vpt-block3.mir new file mode 100644 index 00000000000..a10e5ea2493 --- /dev/null +++ b/test/CodeGen/ARM/mve-vpt-block3.mir @@ -0,0 +1,82 @@ +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2 + %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2 + %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %3 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: test_vminnmq_m_f32_v2 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$q3', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2, $q3, $r0 + + ; CHECK: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q2 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q2 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 + + $vpr = VMSR_P0 killed $r0, 14, $noreg + renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, undef renamable $q2 + renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q2, 1, renamable $vpr, undef renamable $q2 + renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, killed renamable $q0 + renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, killed renamable $q1 + $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 + tBX_RET 14, $noreg, implicit $q0 + +... diff --git a/test/CodeGen/ARM/mve-vpt-block4.mir b/test/CodeGen/ARM/mve-vpt-block4.mir new file mode 100644 index 00000000000..2170ed27746 --- /dev/null +++ b/test/CodeGen/ARM/mve-vpt-block4.mir @@ -0,0 +1,90 @@ +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2 + %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2 + %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2 + %4 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %3, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %4 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: test_vminnmq_m_f32_v2 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$q3', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2, $q3, $r0 + + ; CHECK: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q2 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q2 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: } + ; CHECK-NEXT: BUNDLE {{.*}} { + ; CHECK-NEXT: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 + ; CHECK-NEXT: $q0 = MVE_VORR + ; CHECK-NEXT: } + + $vpr = VMSR_P0 killed $r0, 14, $noreg + renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, undef renamable $q2 + renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q2, 1, renamable $vpr, undef renamable $q2 + renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, killed renamable $q0 + renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, renamable $q3, 1, renamable $vpr, undef renamable $q0 + renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, killed renamable $q1 + $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 + tBX_RET 14, $noreg, implicit $q0 + +... diff --git a/test/CodeGen/ARM/mve-vpt-block5.mir b/test/CodeGen/ARM/mve-vpt-block5.mir new file mode 100644 index 00000000000..8ef35b7a823 --- /dev/null +++ b/test/CodeGen/ARM/mve-vpt-block5.mir @@ -0,0 +1,88 @@ +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive1, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2 + %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2 + %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %3 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: test_vminnmq_m_f32_v2 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2, $r0 + + ; CHECK: BUNDLE {{.*}} { + ; CHECK-NEXT: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 + ; CHECK-NEXT: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q3, internal renamable $q3, 1, renamable $vpr, undef renamable $q1 + ; CHECK-NEXT: $q3 = MVE_VORR $q0, $q0, 0, $noreg, internal undef $q3 + ; CHECK-NEXT: } + ; CHECK-NEXT: BUNDLE {{.*}} { + ; CHECK-NEXT: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 + ; CHECK-NEXT: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q3, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0 + ; CHECK-NEXT: tBX_RET 14, $noreg, implicit internal $q0 + ; CHECK-NEXT: } + + $vpr = VMSR_P0 killed $r0, 14, $noreg + $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 + renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, renamable $q3, 1, renamable $vpr, undef renamable $q1 + $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 + renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0 + tBX_RET 14, $noreg, implicit $q0 + +... diff --git a/test/CodeGen/ARM/mve-vpt-block6.mir b/test/CodeGen/ARM/mve-vpt-block6.mir new file mode 100644 index 00000000000..7060cdca3e3 --- /dev/null +++ b/test/CodeGen/ARM/mve-vpt-block6.mir @@ -0,0 +1,84 @@ +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive1, <4 x float> %a, <4 x float> %b, i16 zeroext %p1, i16 zeroext %p2) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p1 to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %conv.i5 = zext i16 %p2 to i32 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %0, <4 x float> %b, i32 %conv.i5) #2 + ret <4 x float> %1 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: test_vminnmq_m_f32_v2 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2, $r0, $r1 + + ; CHECK: BUNDLE {{.*}} { + ; CHECK-NEXT: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + ; CHECK-NEXT: $vpr = VMSR_P0 killed $r1, 14, $noreg + ; CHECK-NEXT: } + ; CHECK-NEXT: BUNDLE {{.*}} { + ; CHECK-NEXT: MVE_VPST 8, implicit-def $p0 + ; CHECK-NEXT: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0 + ; CHECK-NEXT: tBX_RET 14, $noreg, implicit internal $q0 + ; CHECK-NEXT: } + + $vpr = VMSR_P0 killed $r0, 14, $noreg + $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 + renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $vpr = VMSR_P0 killed $r1, 14, $noreg + renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0 + tBX_RET 14, $noreg, implicit $q0 + +... -- 2.50.1