return true;
}
+ // These are for truncated stores/narrowing loads. They are fine so long as
+ // the alignment is at least the size of the item being loaded
+ if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
+ Alignment >= VT.getScalarSizeInBits() / 8) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+
if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
- Ty != MVT::v2f64 &&
- // These are for truncated stores
- Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
+ Ty != MVT::v2f64)
return false;
if (Subtarget->isLittle()) {
// Widening/Narrowing Loads/Stores
+let MinAlignment = 2 in {
+ def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorevi16 node:$val, node:$ptr)>;
+}
+
let Predicates = [HasMVEInt] in {
- def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr),
- (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
- def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
- (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
- def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr),
- (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>;
+ def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
+ (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
+}
+
+
+let MinAlignment = 2 in {
+ def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
+ def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
+ def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
}
multiclass MVEExtLoad<string DestLanes, string DestElemBits,
string SrcElemBits, string SrcElemType,
- Operand am> {
+ string Align, Operand am> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("extloadvi" # SrcElemBits) am:$addr)),
+ (!cast<PatFrag>("extloadvi" # SrcElemBits # Align) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
am:$addr)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("zextloadvi" # SrcElemBits) am:$addr)),
+ (!cast<PatFrag>("zextloadvi" # SrcElemBits # Align) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
am:$addr)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("sextloadvi" # SrcElemBits) am:$addr)),
+ (!cast<PatFrag>("sextloadvi" # SrcElemBits # Align) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
am:$addr)>;
}
let Predicates = [HasMVEInt] in {
- defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>;
- defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>;
- defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>;
+ defm : MVEExtLoad<"4", "32", "8", "B", "", t2addrmode_imm7<0>>;
+ defm : MVEExtLoad<"8", "16", "8", "B", "", t2addrmode_imm7<0>>;
+ defm : MVEExtLoad<"4", "32", "16", "H", "_align2", t2addrmode_imm7<1>>;
}
define i8* @ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #2
-; CHECK-NEXT: vldrh.u32 q0, [r2]
+; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #2
-; CHECK-NEXT: vldrh.s32 q0, [r2]
+; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #3
-; CHECK-NEXT: vldrb.u32 q0, [r2]
+; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #3
-; CHECK-NEXT: vldrb.s32 q0, [r2]
+; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #3
-; CHECK-NEXT: vldrb.u16 q0, [r2]
+; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #3
-; CHECK-NEXT: vldrb.s16 q0, [r2]
+; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrhi32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhi32_align1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r2, r0, #3
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: ldr.w r3, [r0, #7]
+; CHECK-NEXT: ldr.w r2, [r0, #3]
+; CHECK-NEXT: strd r2, r3, [sp]
+; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
; CHECK-LABEL: strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
-; CHECK-NEXT: adds r1, r0, #2
-; CHECK-NEXT: vstrh.32 q0, [r1]
+; CHECK-NEXT: vstrh.32 q0, [r0, #2]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
; CHECK-LABEL: strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
-; CHECK-NEXT: adds r1, r0, #3
-; CHECK-NEXT: vstrb.32 q0, [r1]
+; CHECK-NEXT: vstrb.32 q0, [r0, #3]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
; CHECK-LABEL: strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
-; CHECK-NEXT: adds r1, r0, #3
-; CHECK-NEXT: vstrb.16 q0, [r1]
+; CHECK-NEXT: vstrb.16 q0, [r0, #3]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
define i8* @strhi32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strhi32_align1:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: adds r1, r0, #3
+; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: vstrh.32 q0, [r1]
+; CHECK-NEXT: ldrd r1, r2, [sp]
+; CHECK-NEXT: str.w r1, [r0, #3]
+; CHECK-NEXT: str.w r2, [r0, #7]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
define i8* @ldrhi32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhi32_align1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.s32 q0, [r0]
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: ldr r3, [r0, #4]
+; CHECK-NEXT: ldr r2, [r0]
; CHECK-NEXT: adds r0, #3
+; CHECK-NEXT: strd r2, r3, [sp]
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
define i8* @strhi32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strhi32_align1:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrh.32 q0, [r0]
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vstrh.32 q0, [r1]
+; CHECK-NEXT: ldrd r1, r2, [sp]
+; CHECK-NEXT: str r1, [r0]
+; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #3
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
define i8* @ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
-; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
-; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
-; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
-; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
-; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
-; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
define i8* @ldrhi32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhi32_align1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r0, #3
-; CHECK-NEXT: vldrh.s32 q0, [r0]
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: ldr r2, [r0, #3]!
+; CHECK-NEXT: str r2, [sp]
+; CHECK-NEXT: ldr r2, [r0, #4]
+; CHECK-NEXT: str r2, [sp, #4]
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
define i8* @strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r1]
-; CHECK-NEXT: vstrh.32 q0, [r0]
+; CHECK-NEXT: vstrh.32 q0, [r0, #2]
+; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
define i8* @strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r1]
-; CHECK-NEXT: vstrb.32 q0, [r0]
+; CHECK-NEXT: vstrb.32 q0, [r0, #3]
+; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
define i8* @strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r1]
-; CHECK-NEXT: vstrb.16 q0, [r0]
+; CHECK-NEXT: vstrb.16 q0, [r0, #3]
+; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
define i8* @strhi32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strhi32_align1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adds r0, #3
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrh.32 q0, [r0]
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vstrh.32 q0, [r1]
+; CHECK-NEXT: ldrd r1, r2, [sp]
+; CHECK-NEXT: str r1, [r0, #3]!
+; CHECK-NEXT: str r2, [r0, #4]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int8_int32:
ret void
}
-
define void @foo_int16_int32(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int16_int32:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_int8_int16(<8 x i8>* %dest, <8 x i16>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int8_int16:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_int32_int8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int32_int8:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_int16_int8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int16_int8:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_int32_int16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int32_int16:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_uint32_uint8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_uint32_uint8:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_uint16_uint8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_uint16_uint8:
; CHECK: @ %bb.0: @ %entry
ret void
}
-
define void @foo_uint32_uint16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_uint32_uint16:
; CHECK: @ %bb.0: @ %entry
store <4 x i32> %0, <4 x i32>* %dest, align 4
ret void
}
+
+
+
+
+define void @foo_int16_int32_align1(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
+; CHECK-LABEL: foo_int16_int32_align1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vldrw.u32 q0, [r1]
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vstrh.32 q0, [r1]
+; CHECK-NEXT: ldrd r1, r2, [sp]
+; CHECK-NEXT: str r1, [r0]
+; CHECK-NEXT: str r2, [r0, #4]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: bx lr
+entry:
+ %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
+ %0 = trunc <4 x i32> %wide.load to <4 x i16>
+ store <4 x i16> %0, <4 x i16>* %dest, align 1
+ ret void
+}
+
+define void @foo_int32_int16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
+; CHECK-LABEL: foo_int32_int16_align1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: ldr r2, [r1]
+; CHECK-NEXT: ldr r1, [r1, #4]
+; CHECK-NEXT: strd r2, r1, [sp]
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vldrh.s32 q0, [r1]
+; CHECK-NEXT: vstrw.32 q0, [r0]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: bx lr
+entry:
+ %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
+ %0 = sext <4 x i16> %wide.load to <4 x i32>
+ store <4 x i32> %0, <4 x i32>* %dest, align 4
+ ret void
+}
+
+define void @foo_uint32_uint16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
+; CHECK-LABEL: foo_uint32_uint16_align1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: ldr r2, [r1]
+; CHECK-NEXT: ldr r1, [r1, #4]
+; CHECK-NEXT: strd r2, r1, [sp]
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vldrh.u32 q0, [r1]
+; CHECK-NEXT: vstrw.32 q0, [r0]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: bx lr
+entry:
+ %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
+ %0 = zext <4 x i16> %wide.load to <4 x i32>
+ store <4 x i32> %0, <4 x i32>* %dest, align 4
+ ret void
+}