From 5e13867aa38a53e87ac373299c74e9489c9ea6a3 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 15:43:00 +0000 Subject: [PATCH] [ARM] Add sign and zero extend patterns for MVE The vmovlb instructions can be uses to sign or zero extend vector registers between types. This adds some patterns for them and relevant testing. The VBICIMM generation is also put behind a hasNEON check (as is already done for VORRIMM). Code originally by David Sherwood. Differential Revision: https://reviews.llvm.org/D64069 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/ARM/ARMInstrMVE.td | 17 ++++++ test/CodeGen/Thumb2/mve-sext.ll | 93 ++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/Thumb2/mve-sext.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e8526d1f31c..5e2de61e288 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11180,7 +11180,7 @@ static SDValue PerformANDCombine(SDNode *N, APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && + if (BVN && Subtarget->hasNEON() && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index dcee5d749bb..10ed876f484 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -1002,6 +1002,23 @@ defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>; defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>; defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16), + (MVE_VMOVLs16bh MQPR:$src)>; + def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8), + (MVE_VMOVLs8bh MQPR:$src)>; + def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8), + (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>; + + // zext_inreg 16 -> 32 + def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))), + (MVE_VMOVLu16bh MQPR:$src)>; + // zext_inreg 8 -> 16 + def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))), + (MVE_VMOVLu8bh MQPR:$src)>; +} + + class MVE_VSHLL_imm pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops), diff --git a/test/CodeGen/Thumb2/mve-sext.ll b/test/CodeGen/Thumb2/mve-sext.ll new file mode 100644 index 00000000000..9458fdc47e5 --- /dev/null +++ b/test/CodeGen/Thumb2/mve-sext.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) { +; CHECK-LABEL: sext_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <8 x i8> %src to <8 x i16> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) { +; CHECK-LABEL: sext_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <4 x i16> %src to <4 x i32> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) { +; CHECK-LABEL: sext_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <4 x i8> %src to <4 x i32> + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) { +; CHECK-LABEL: zext_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = zext <8 x i8> %src to <8 x i16> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) { +; CHECK-LABEL: zext_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = zext <4 x i16> %src to <4 x i32> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) { +; CHECK-LABEL: zext_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q1, #0xff +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext <4 x i8> %src to <4 x i32> + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) { +; CHECK-LABEL: trunc_v8i16_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <8 x i16> %src to <8 x i8> + ret <8 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) { +; CHECK-LABEL: trunc_v4i32_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <4 x i32> %src to <4 x i16> + ret <4 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) { +; CHECK-LABEL: trunc_v4i32_v4i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <4 x i32> %src to <4 x i8> + ret <4 x i8> %0 +} -- 2.50.1