From: David Green Date: Thu, 15 Aug 2019 12:54:47 +0000 (+0000) Subject: [ARM] Fix alignment checks for BE VLDRH X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2a9a8731ff819d276eeceb038dc4302f0f4457f1;p=llvm [ARM] Fix alignment checks for BE VLDRH We need to allow any alignment at least 2, not just exactly 2, so that the big endian loads and stores can be selected successfully. I've also added extra BE testing for the load and store tests. Thanks to Oliver for the report. Differential Revision: https://reviews.llvm.org/D66222 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368996 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index cd670819fad..ce51322f913 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -4846,11 +4846,11 @@ def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), }]>; def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (pre_store node:$val, node:$ptr, node:$offset), [{ - return cast(N)->getAlignment() == 2; + return cast(N)->getAlignment() >= 2; }]>; def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (post_store node:$val, node:$ptr, node:$offset), [{ - return cast(N)->getAlignment() == 2; + return cast(N)->getAlignment() >= 2; }]>; let Predicates = [HasMVEInt, IsLE] in { diff --git a/test/CodeGen/Thumb2/mve-ldst-offset.ll b/test/CodeGen/Thumb2/mve-ldst-offset.ll index bff8b351bfe..5a0a6051867 100644 --- a/test/CodeGen/Thumb2/mve-ldst-offset.ll +++ b/test/CodeGen/Thumb2/mve-ldst-offset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -720,11 +721,18 @@ entry: } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -735,11 +743,18 @@ entry: } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -772,12 +787,19 @@ entry: ret i8* %x } -define i8* @ldrwf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +define i8* @ldrf32_align1(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x float>* @@ -787,12 +809,19 @@ entry: ret i8* %x } -define i8* @ldrwf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +define i8* @ldrf16_align1(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x half>* @@ -802,6 +831,27 @@ entry: ret i8* %x } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4] +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + @@ -1294,11 +1344,18 @@ entry: } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1309,11 +1366,18 @@ entry: } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1347,11 +1411,18 @@ entry: } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1362,11 +1433,18 @@ entry: } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1375,3 +1453,24 @@ entry: store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %y } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %y +} diff --git a/test/CodeGen/Thumb2/mve-ldst-postinc.ll b/test/CodeGen/Thumb2/mve-ldst-postinc.ll index e97b7a51bcc..61afa727c62 100644 --- a/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ b/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -17,11 +18,18 @@ entry: } define i8* @ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwu32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwu32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwu32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -291,11 +299,18 @@ entry: } define i8* @ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhu16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhu16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhu16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -708,11 +723,19 @@ entry: } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -723,11 +746,19 @@ entry: } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -762,11 +793,19 @@ entry: } define i8* @ldrf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -777,11 +816,19 @@ entry: } define i8* @ldrf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -791,16 +838,43 @@ entry: ret i8* %z } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* @@ -811,11 +885,18 @@ entry: } define i8* @strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -826,11 +907,17 @@ entry: } define i8* @strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_m4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #-4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_m4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_m4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* @@ -982,11 +1069,17 @@ entry: define i8* @strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* @@ -997,11 +1090,18 @@ entry: } define i8* @strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1012,11 +1112,17 @@ entry: } define i8* @strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #2 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_2: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* @@ -1244,11 +1350,17 @@ entry: } define i8* @strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* @@ -1259,11 +1371,17 @@ entry: } define i8* @strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* @@ -1274,11 +1392,19 @@ entry: } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1289,11 +1415,19 @@ entry: } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1328,11 +1462,19 @@ entry: } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1343,11 +1485,19 @@ entry: } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1356,3 +1506,24 @@ entry: store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %z +} diff --git a/test/CodeGen/Thumb2/mve-ldst-preinc.ll b/test/CodeGen/Thumb2/mve-ldst-preinc.ll index 0cbf55b5a3d..ca1731a23d3 100644 --- a/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ b/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -17,11 +18,18 @@ entry: } define i8* @ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwu32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwu32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwu32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrw.u32 q0, [r0] +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -291,11 +299,18 @@ entry: } define i8* @ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhu16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhu16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhu16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrh.u16 q0, [r0] +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -708,11 +723,19 @@ entry: } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -723,11 +746,19 @@ entry: } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -762,11 +793,19 @@ entry: } define i8* @ldrf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x float>* @@ -777,11 +816,19 @@ entry: } define i8* @ldrf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x half>* @@ -791,16 +838,43 @@ entry: ret i8* %z } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]! +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* @@ -811,11 +885,18 @@ entry: } define i8* @strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -826,11 +907,17 @@ entry: } define i8* @strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_m4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #-4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_m4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_m4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* @@ -982,11 +1069,17 @@ entry: define i8* @strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* @@ -997,11 +1090,18 @@ entry: } define i8* @strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1012,11 +1112,17 @@ entry: } define i8* @strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #2]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_2: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* @@ -1244,11 +1350,17 @@ entry: } define i8* @strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* @@ -1259,11 +1371,17 @@ entry: } define i8* @strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* @@ -1274,11 +1392,19 @@ entry: } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1289,11 +1415,19 @@ entry: } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1327,11 +1461,19 @@ entry: } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1342,11 +1484,19 @@ entry: } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1355,3 +1505,24 @@ entry: store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]! +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %z +}