From 2a9a8731ff819d276eeceb038dc4302f0f4457f1 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 15 Aug 2019 12:54:47 +0000 Subject: [PATCH] [ARM] Fix alignment checks for BE VLDRH We need to allow any alignment at least 2, not just exactly 2, so that the big endian loads and stores can be selected successfully. I've also added extra BE testing for the load and store tests. Thanks to Oliver for the report. Differential Revision: https://reviews.llvm.org/D66222 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrMVE.td | 4 +- test/CodeGen/Thumb2/mve-ldst-offset.ll | 185 ++++++++++--- test/CodeGen/Thumb2/mve-ldst-postinc.ll | 353 ++++++++++++++++++------ test/CodeGen/Thumb2/mve-ldst-preinc.ll | 353 ++++++++++++++++++------ 4 files changed, 668 insertions(+), 227 deletions(-) diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index cd670819fad..ce51322f913 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -4846,11 +4846,11 @@ def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), }]>; def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (pre_store node:$val, node:$ptr, node:$offset), [{ - return cast(N)->getAlignment() == 2; + return cast(N)->getAlignment() >= 2; }]>; def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (post_store node:$val, node:$ptr, node:$offset), [{ - return cast(N)->getAlignment() == 2; + return cast(N)->getAlignment() >= 2; }]>; let Predicates = [HasMVEInt, IsLE] in { diff --git a/test/CodeGen/Thumb2/mve-ldst-offset.ll b/test/CodeGen/Thumb2/mve-ldst-offset.ll index bff8b351bfe..5a0a6051867 100644 --- a/test/CodeGen/Thumb2/mve-ldst-offset.ll +++ b/test/CodeGen/Thumb2/mve-ldst-offset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -720,11 +721,18 @@ entry: } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -735,11 +743,18 @@ entry: } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -772,12 +787,19 @@ entry: ret i8* %x } -define i8* @ldrwf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +define i8* @ldrf32_align1(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x float>* @@ -787,12 +809,19 @@ entry: ret i8* %x } -define i8* @ldrwf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +define i8* @ldrf16_align1(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x half>* @@ -802,6 +831,27 @@ entry: ret i8* %x } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4] +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + @@ -1294,11 +1344,18 @@ entry: } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1309,11 +1366,18 @@ entry: } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1347,11 +1411,18 @@ entry: } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1362,11 +1433,18 @@ entry: } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1375,3 +1453,24 @@ entry: store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %y } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %y +} diff --git a/test/CodeGen/Thumb2/mve-ldst-postinc.ll b/test/CodeGen/Thumb2/mve-ldst-postinc.ll index e97b7a51bcc..61afa727c62 100644 --- a/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ b/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -17,11 +18,18 @@ entry: } define i8* @ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwu32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwu32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwu32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -291,11 +299,18 @@ entry: } define i8* @ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhu16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhu16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhu16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -708,11 +723,19 @@ entry: } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -723,11 +746,19 @@ entry: } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -762,11 +793,19 @@ entry: } define i8* @ldrf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -777,11 +816,19 @@ entry: } define i8* @ldrf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -791,16 +838,43 @@ entry: ret i8* %z } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* @@ -811,11 +885,18 @@ entry: } define i8* @strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -826,11 +907,17 @@ entry: } define i8* @strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_m4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #-4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_m4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_m4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* @@ -982,11 +1069,17 @@ entry: define i8* @strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* @@ -997,11 +1090,18 @@ entry: } define i8* @strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1012,11 +1112,17 @@ entry: } define i8* @strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #2 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_2: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* @@ -1244,11 +1350,17 @@ entry: } define i8* @strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* @@ -1259,11 +1371,17 @@ entry: } define i8* @strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* @@ -1274,11 +1392,19 @@ entry: } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1289,11 +1415,19 @@ entry: } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1328,11 +1462,19 @@ entry: } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1343,11 +1485,19 @@ entry: } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1356,3 +1506,24 @@ entry: store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %z +} diff --git a/test/CodeGen/Thumb2/mve-ldst-preinc.ll b/test/CodeGen/Thumb2/mve-ldst-preinc.ll index 0cbf55b5a3d..ca1731a23d3 100644 --- a/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ b/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -17,11 +18,18 @@ entry: } define i8* @ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwu32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwu32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwu32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrw.u32 q0, [r0] +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -291,11 +299,18 @@ entry: } define i8* @ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhu16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhu16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhu16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrh.u16 q0, [r0] +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -708,11 +723,19 @@ entry: } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -723,11 +746,19 @@ entry: } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -762,11 +793,19 @@ entry: } define i8* @ldrf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x float>* @@ -777,11 +816,19 @@ entry: } define i8* @ldrf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x half>* @@ -791,16 +838,43 @@ entry: ret i8* %z } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]! +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* @@ -811,11 +885,18 @@ entry: } define i8* @strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -826,11 +907,17 @@ entry: } define i8* @strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_m4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #-4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_m4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_m4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* @@ -982,11 +1069,17 @@ entry: define i8* @strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* @@ -997,11 +1090,18 @@ entry: } define i8* @strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1012,11 +1112,17 @@ entry: } define i8* @strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #2]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_2: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* @@ -1244,11 +1350,17 @@ entry: } define i8* @strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* @@ -1259,11 +1371,17 @@ entry: } define i8* @strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* @@ -1274,11 +1392,19 @@ entry: } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1289,11 +1415,19 @@ entry: } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1327,11 +1461,19 @@ entry: } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1342,11 +1484,19 @@ entry: } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1355,3 +1505,24 @@ entry: store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]! +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %z +} -- 2.50.1