From: David Green Date: Thu, 8 Aug 2019 05:58:48 +0000 (+0000) Subject: [ARM] Rejig MVE load store tests. NFC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=34cd871096da81823efa76beb97d51f4dfdc2349;p=llvm [ARM] Rejig MVE load store tests. NFC This adjusts the load/store tests for better testing of alignments. It also adds some extra alignment 1 tests, useful for future commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368255 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/Thumb2/mve-ldst-offset.ll b/test/CodeGen/Thumb2/mve-ldst-offset.ll index 3ad88ff48c0..d7f60d4328b 100644 --- a/test/CodeGen/Thumb2/mve-ldst-offset.ll +++ b/test/CodeGen/Thumb2/mve-ldst-offset.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s -define i8* @post_ldrwu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_4: +define i8* @ldrwu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0, #4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -10,14 +10,14 @@ define i8* @post_ldrwu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_3: +define i8* @ldrwu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r2] @@ -26,14 +26,14 @@ define i8* @post_ldrwu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m4: +define i8* @ldrwu32_m4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -41,14 +41,14 @@ define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -4 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrwu32_508(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_508: +define i8* @ldrwu32_508(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r2] @@ -57,14 +57,14 @@ define i8* @post_ldrwu32_508(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 508 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrwu32_512(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_512: +define i8* @ldrwu32_512(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r2] @@ -73,14 +73,14 @@ define i8* @post_ldrwu32_512(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 512 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m508: +define i8* @ldrwu32_m508(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: sub.w r2, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r2] @@ -89,14 +89,14 @@ define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -508 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrwu32_m512(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m512: +define i8* @ldrwu32_m512(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: sub.w r2, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r2] @@ -105,15 +105,15 @@ define i8* @post_ldrwu32_m512(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -512 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %x } -define i8* @post_ldrhu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_4: +define i8* @ldrhu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0, #4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -121,15 +121,15 @@ define i8* @post_ldrhu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_3: +define i8* @ldrhu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrh.u32 q0, [r2] @@ -138,15 +138,15 @@ define i8* @post_ldrhu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhu32_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_2: +define i8* @ldrhu32_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #2 ; CHECK-NEXT: vldrh.u32 q0, [r2] @@ -155,15 +155,15 @@ define i8* @post_ldrhu32_2(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhu32_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_254: +define i8* @ldrhu32_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #254 ; CHECK-NEXT: vldrh.u32 q0, [r2] @@ -172,15 +172,15 @@ define i8* @post_ldrhu32_254(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhu32_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_256: +define i8* @ldrhu32_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #256 ; CHECK-NEXT: vldrh.u32 q0, [r2] @@ -189,16 +189,16 @@ define i8* @post_ldrhu32_256(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhs32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_4: +define i8* @ldrhs32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0, #4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -206,15 +206,15 @@ define i8* @post_ldrhs32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhs32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_3: +define i8* @ldrhs32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrh.s32 q0, [r2] @@ -223,15 +223,15 @@ define i8* @post_ldrhs32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhs32_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_2: +define i8* @ldrhs32_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #2 ; CHECK-NEXT: vldrh.s32 q0, [r2] @@ -240,15 +240,15 @@ define i8* @post_ldrhs32_2(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhs32_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_254: +define i8* @ldrhs32_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #254 ; CHECK-NEXT: vldrh.s32 q0, [r2] @@ -257,15 +257,15 @@ define i8* @post_ldrhs32_254(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhs32_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_256: +define i8* @ldrhs32_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #256 ; CHECK-NEXT: vldrh.s32 q0, [r2] @@ -274,96 +274,95 @@ define i8* @post_ldrhs32_256(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrhu16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_4: +define i8* @ldrhu16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %x } -define i8* @post_ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_3: +define i8* @ldrhu16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %x } -define i8* @post_ldrhu16_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_2: +define i8* @ldrhu16_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r2, r0, #2 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %x } -define i8* @post_ldrhu16_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_254: +define i8* @ldrhu16_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #254 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %x } -define i8* @post_ldrhu16_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_256: +define i8* @ldrhu16_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #256 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %x } -define i8* @post_ldrbu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_4: +define i8* @ldrbu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r0, #4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -371,15 +370,15 @@ define i8* @post_ldrbu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_3: +define i8* @ldrbu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrb.u32 q0, [r2] @@ -388,15 +387,15 @@ define i8* @post_ldrbu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbu32_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_127: +define i8* @ldrbu32_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #127 ; CHECK-NEXT: vldrb.u32 q0, [r2] @@ -405,15 +404,15 @@ define i8* @post_ldrbu32_127(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbu32_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_128: +define i8* @ldrbu32_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #128 ; CHECK-NEXT: vldrb.u32 q0, [r2] @@ -422,16 +421,16 @@ define i8* @post_ldrbu32_128(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbs32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_4: +define i8* @ldrbs32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q0, [r0, #4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -439,15 +438,15 @@ define i8* @post_ldrbs32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbs32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_3: +define i8* @ldrbs32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrb.s32 q0, [r2] @@ -456,15 +455,15 @@ define i8* @post_ldrbs32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbs32_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_127: +define i8* @ldrbs32_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #127 ; CHECK-NEXT: vldrb.s32 q0, [r2] @@ -473,15 +472,15 @@ define i8* @post_ldrbs32_127(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbs32_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_128: +define i8* @ldrbs32_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #128 ; CHECK-NEXT: vldrb.s32 q0, [r2] @@ -490,214 +489,214 @@ define i8* @post_ldrbs32_128(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %x } -define i8* @post_ldrbu16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_4: +define i8* @ldrbu16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r0, #4] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_3: +define i8* @ldrbu16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrb.u16 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbu16_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_127: +define i8* @ldrbu16_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #127 ; CHECK-NEXT: vldrb.u16 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbu16_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_128: +define i8* @ldrbu16_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #128 ; CHECK-NEXT: vldrb.u16 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbs16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_4: +define i8* @ldrbs16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q0, [r0, #4] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbs16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_3: +define i8* @ldrbs16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r2, r0, #3 ; CHECK-NEXT: vldrb.s16 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbs16_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_127: +define i8* @ldrbs16_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #127 ; CHECK-NEXT: vldrb.s16 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbs16_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_128: +define i8* @ldrbs16_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #128 ; CHECK-NEXT: vldrb.s16 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %x } -define i8* @post_ldrbu8_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_4: +define i8* @ldrbu8_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r0, #4] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %x } -define i8* @post_ldrbu8_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_3: +define i8* @ldrbu8_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r2, r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %x } -define i8* @post_ldrbu8_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_127: +define i8* @ldrbu8_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r2, r0, #127 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %x } -define i8* @post_ldrbu8_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_128: +define i8* @ldrbu8_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #128] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: add.w r2, r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %x } -define i8* @post_ldrwf32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwf32_4: +define i8* @ldrwf32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0, #4] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -705,24 +704,101 @@ define i8* @post_ldrwf32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 8 + %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = bitcast i8* %y to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 8 + store <4 x float> %1, <4 x float>* %2, align 4 ret i8* %x } -define i8* @post_ldrwf16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwf16_4: +define i8* @ldrwf16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 8 + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrwi32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 1 + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrhi16_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhi16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 1 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhi32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r2, r0, #3 +; CHECK-NEXT: vldrh.s32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 1 + %2 = bitcast i8* %y to <4 x i32>* + %3 = sext <4 x i16> %1 to <4 x i32> + store <4 x i32> %3, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 1 + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf16_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 1 %2 = bitcast i8* %y to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 8 + store <8 x half> %1, <8 x half>* %2, align 2 ret i8* %x } @@ -730,8 +806,8 @@ entry: -define i8* @post_strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_4: +define i8* @strw32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0, #4] @@ -739,14 +815,14 @@ define i8* @post_strw32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_3: +define i8* @strw32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 @@ -755,14 +831,14 @@ define i8* @post_strw32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m4: +define i8* @strw32_m4(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0, #-4] @@ -770,14 +846,14 @@ define i8* @post_strw32_m4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strw32_508(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_508: +define i8* @strw32_508(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #508 @@ -786,14 +862,14 @@ define i8* @post_strw32_508(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strw32_512(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_512: +define i8* @strw32_512(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #512 @@ -802,14 +878,14 @@ define i8* @post_strw32_512(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strw32_m508(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m508: +define i8* @strw32_m508(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #508 @@ -818,14 +894,14 @@ define i8* @post_strw32_m508(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strw32_m512(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m512: +define i8* @strw32_m512(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #512 @@ -834,15 +910,15 @@ define i8* @post_strw32_m512(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %y } -define i8* @post_strh32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_4: +define i8* @strh32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0, #4] @@ -850,14 +926,14 @@ define i8* @post_strh32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_3: +define i8* @strh32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 @@ -866,14 +942,14 @@ define i8* @post_strh32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh32_2(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_2: +define i8* @strh32_2(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #2 @@ -882,14 +958,14 @@ define i8* @post_strh32_2(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh32_254(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_254: +define i8* @strh32_254(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #254 @@ -898,14 +974,14 @@ define i8* @post_strh32_254(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 254 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh32_256(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_256: +define i8* @strh32_256(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #256 @@ -914,95 +990,94 @@ define i8* @post_strh32_256(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 256 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_4: +define i8* @strh16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #4] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_3: +define i8* @strh16_3(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_2: +define i8* @strh16_2(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: adds r1, r0, #2 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #2] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh16_254(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_254: +define i8* @strh16_254(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #254 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strh16_256(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_256: +define i8* @strh16_256(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #256 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 256 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %y } -define i8* @post_strb32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_4: +define i8* @strb32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vstrb.32 q0, [r0, #4] @@ -1010,14 +1085,14 @@ define i8* @post_strb32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_3: +define i8* @strb32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 @@ -1026,14 +1101,14 @@ define i8* @post_strb32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb32_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_127: +define i8* @strb32_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #127 @@ -1042,14 +1117,14 @@ define i8* @post_strb32_127(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb32_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_128: +define i8* @strb32_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #128 @@ -1058,15 +1133,15 @@ define i8* @post_strb32_128(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_4: +define i8* @strb16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vstrb.16 q0, [r0, #4] @@ -1074,14 +1149,14 @@ define i8* @post_strb16_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb16_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_3: +define i8* @strb16_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 @@ -1090,14 +1165,14 @@ define i8* @post_strb16_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb16_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_127: +define i8* @strb16_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #127 @@ -1106,14 +1181,14 @@ define i8* @post_strb16_127(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb16_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_128: +define i8* @strb16_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #128 @@ -1122,77 +1197,77 @@ define i8* @post_strb16_128(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb8_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_4: +define i8* @strb8_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #4] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb8_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_3: +define i8* @strb8_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb8_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_127: +define i8* @strb8_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #127 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strb8_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_128: +define i8* @strb8_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #128] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %y } -define i8* @post_strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strf32_4: +define i8* @strf32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0, #4] @@ -1200,23 +1275,100 @@ define i8* @post_strf32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 8 + %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = bitcast i8* %z to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 8 + store <4 x float> %1, <4 x float>* %2, align 4 ret i8* %y } -define i8* @post_strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strf16_4: +define i8* @strf16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #4] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 8 + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %y +} + +define i8* @strwi32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strwi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 1 + ret i8* %y +} + +define i8* @strhi16_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strhi16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 1 + ret i8* %y +} + +define i8* @strhi32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strhi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vstrh.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i16>* + %3 = trunc <4 x i32> %1 to <4 x i16> + store <4 x i16> %3, <4 x i16>* %2, align 1 + ret i8* %y +} + +define i8* @strf32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strf32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 1 + ret i8* %y +} + +define i8* @strf16_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strf16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 2 %2 = bitcast i8* %z to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 8 + store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %y } diff --git a/test/CodeGen/Thumb2/mve-ldst-postinc.ll b/test/CodeGen/Thumb2/mve-ldst-postinc.ll index e979711f39a..a9b27bc23c8 100644 --- a/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ b/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s -define i8* @post_ldrwu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_4: +define i8* @ldrwu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: adds r0, #4 @@ -11,14 +11,14 @@ define i8* @post_ldrwu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_3: +define i8* @ldrwu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -27,14 +27,14 @@ define i8* @post_ldrwu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m4: +define i8* @ldrwu32_m4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: subs r0, #4 @@ -43,14 +43,14 @@ define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_508(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_508: +define i8* @ldrwu32_508(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #508 @@ -59,14 +59,14 @@ define i8* @post_ldrwu32_508(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_512(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_512: +define i8* @ldrwu32_512(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #512 @@ -75,14 +75,14 @@ define i8* @post_ldrwu32_512(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m508: +define i8* @ldrwu32_m508(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #508 @@ -91,14 +91,14 @@ define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_m512(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m512: +define i8* @ldrwu32_m512(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #512 @@ -107,15 +107,15 @@ define i8* @post_ldrwu32_m512(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrhu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_4: +define i8* @ldrhu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: adds r0, #4 @@ -124,15 +124,15 @@ define i8* @post_ldrhu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_3: +define i8* @ldrhu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -141,15 +141,15 @@ define i8* @post_ldrhu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_2: +define i8* @ldrhu32_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: adds r0, #2 @@ -158,15 +158,15 @@ define i8* @post_ldrhu32_2(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_254: +define i8* @ldrhu32_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: adds r0, #254 @@ -175,15 +175,15 @@ define i8* @post_ldrhu32_254(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_256: +define i8* @ldrhu32_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 @@ -192,16 +192,16 @@ define i8* @post_ldrhu32_256(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_4: +define i8* @ldrhs32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0] ; CHECK-NEXT: adds r0, #4 @@ -210,15 +210,15 @@ define i8* @post_ldrhs32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_3: +define i8* @ldrhs32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -227,15 +227,15 @@ define i8* @post_ldrhs32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_2: +define i8* @ldrhs32_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0] ; CHECK-NEXT: adds r0, #2 @@ -244,15 +244,15 @@ define i8* @post_ldrhs32_2(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_254: +define i8* @ldrhs32_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0] ; CHECK-NEXT: adds r0, #254 @@ -261,15 +261,15 @@ define i8* @post_ldrhs32_254(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_256: +define i8* @ldrhs32_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 @@ -278,97 +278,97 @@ define i8* @post_ldrhs32_256(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_4: +define i8* @ldrhu16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_3: +define i8* @ldrhu16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_2: +define i8* @ldrhu16_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: adds r0, #2 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_254: +define i8* @ldrhu16_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_256: +define i8* @ldrhu16_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrbu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_4: +define i8* @ldrbu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r0] ; CHECK-NEXT: adds r0, #4 @@ -377,15 +377,15 @@ define i8* @post_ldrbu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_3: +define i8* @ldrbu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -394,15 +394,15 @@ define i8* @post_ldrbu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu32_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_127: +define i8* @ldrbu32_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r0] ; CHECK-NEXT: adds r0, #127 @@ -411,15 +411,15 @@ define i8* @post_ldrbu32_127(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu32_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_128: +define i8* @ldrbu32_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r0] ; CHECK-NEXT: adds r0, #128 @@ -428,16 +428,16 @@ define i8* @post_ldrbu32_128(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_4: +define i8* @ldrbs32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q0, [r0] ; CHECK-NEXT: adds r0, #4 @@ -446,15 +446,15 @@ define i8* @post_ldrbs32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_3: +define i8* @ldrbs32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -463,15 +463,15 @@ define i8* @post_ldrbs32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_127: +define i8* @ldrbs32_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q0, [r0] ; CHECK-NEXT: adds r0, #127 @@ -480,15 +480,15 @@ define i8* @post_ldrbs32_127(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_128: +define i8* @ldrbs32_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q0, [r0] ; CHECK-NEXT: adds r0, #128 @@ -497,218 +497,218 @@ define i8* @post_ldrbs32_128(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_4: +define i8* @ldrbu16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r0] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_3: +define i8* @ldrbu16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r0] ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu16_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_127: +define i8* @ldrbu16_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r0] ; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu16_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_128: +define i8* @ldrbu16_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r0] ; CHECK-NEXT: adds r0, #128 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_4: +define i8* @ldrbs16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q0, [r0] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_3: +define i8* @ldrbs16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q0, [r0] ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_127: +define i8* @ldrbs16_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q0, [r0] ; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_128: +define i8* @ldrbs16_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q0, [r0] ; CHECK-NEXT: adds r0, #128 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu8_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_4: +define i8* @ldrbu8_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrbu8_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_3: +define i8* @ldrbu8_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0] ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrbu8_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_127: +define i8* @ldrbu8_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0] ; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrbu8_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_128: +define i8* @ldrbu8_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0] ; CHECK-NEXT: adds r0, #128 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrwf32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwf32_4: +define i8* @ldrwf32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: adds r0, #4 @@ -717,25 +717,106 @@ define i8* @post_ldrwf32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 8 + %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = bitcast i8* %y to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 8 + store <4 x float> %1, <4 x float>* %2, align 4 ret i8* %z } -define i8* @post_ldrwf16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwf16_4: +define i8* @ldrwf16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %x to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 8 + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrwi32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 1 + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhi16_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhi16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 1 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhi32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 1 + %2 = bitcast i8* %y to <4 x i32>* + %3 = sext <4 x i16> %1 to <4 x i32> + store <4 x i32> %3, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrf32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrf32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 1 + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrf16_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrf16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 1 %2 = bitcast i8* %y to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 8 + store <8 x half> %1, <8 x half>* %2, align 2 ret i8* %z } @@ -743,8 +824,8 @@ entry: -define i8* @post_strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_4: +define i8* @strw32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -753,14 +834,14 @@ define i8* @post_strw32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_3: +define i8* @strw32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -769,14 +850,14 @@ define i8* @post_strw32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m4: +define i8* @strw32_m4(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -785,14 +866,14 @@ define i8* @post_strw32_m4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_508(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_508: +define i8* @strw32_508(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -801,14 +882,14 @@ define i8* @post_strw32_508(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_512(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_512: +define i8* @strw32_512(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -817,14 +898,14 @@ define i8* @post_strw32_512(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_m508(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m508: +define i8* @strw32_m508(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -833,14 +914,14 @@ define i8* @post_strw32_m508(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_m512(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m512: +define i8* @strw32_m512(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -849,15 +930,15 @@ define i8* @post_strw32_m512(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strh32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_4: +define i8* @strh32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0] @@ -866,14 +947,14 @@ define i8* @post_strh32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %y to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_3: +define i8* @strh32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0] @@ -882,14 +963,14 @@ define i8* @post_strh32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %y to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_2(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_2: +define i8* @strh32_2(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0] @@ -898,14 +979,14 @@ define i8* @post_strh32_2(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %y to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_254(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_254: +define i8* @strh32_254(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0] @@ -914,14 +995,14 @@ define i8* @post_strh32_254(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 254 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %y to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_256(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_256: +define i8* @strh32_256(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0] @@ -930,96 +1011,96 @@ define i8* @post_strh32_256(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 256 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %y to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_4: +define i8* @strh16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_3: +define i8* @strh16_3(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_2: +define i8* @strh16_2(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_254(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_254: +define i8* @strh16_254(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_256(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_256: +define i8* @strh16_256(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 256 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strb32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_4: +define i8* @strb32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vstrb.32 q0, [r0] @@ -1028,14 +1109,14 @@ define i8* @post_strb32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %y to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_3: +define i8* @strb32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vstrb.32 q0, [r0] @@ -1044,14 +1125,14 @@ define i8* @post_strb32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %y to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb32_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_127: +define i8* @strb32_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vstrb.32 q0, [r0] @@ -1060,14 +1141,14 @@ define i8* @post_strb32_127(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %y to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb32_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_128: +define i8* @strb32_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vstrb.32 q0, [r0] @@ -1076,15 +1157,15 @@ define i8* @post_strb32_128(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %y to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_4: +define i8* @strb16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vstrb.16 q0, [r0] @@ -1093,14 +1174,14 @@ define i8* @post_strb16_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %y to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_3: +define i8* @strb16_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vstrb.16 q0, [r0] @@ -1109,14 +1190,14 @@ define i8* @post_strb16_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %y to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_127: +define i8* @strb16_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vstrb.16 q0, [r0] @@ -1125,14 +1206,14 @@ define i8* @post_strb16_127(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %y to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_128: +define i8* @strb16_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vstrb.16 q0, [r0] @@ -1141,79 +1222,79 @@ define i8* @post_strb16_128(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %y to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_4: +define i8* @strb8_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_3: +define i8* @strb8_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_127: +define i8* @strb8_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_128: +define i8* @strb8_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strf32_4: +define i8* @strf32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0] @@ -1222,24 +1303,105 @@ define i8* @post_strf32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 8 + %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = bitcast i8* %y to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 8 + store <4 x float> %1, <4 x float>* %2, align 4 ret i8* %z } -define i8* @post_strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strf16_4: +define i8* @strf16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 8 + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @strwi32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strwi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 1 + ret i8* %z +} + +define i8* @strhi16_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strhi16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 1 + ret i8* %z +} + +define i8* @strhi32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strhi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i16>* + %3 = trunc <4 x i32> %1 to <4 x i16> + store <4 x i16> %3, <4 x i16>* %2, align 1 + ret i8* %z +} + +define i8* @strf32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strf32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 1 + ret i8* %z +} + +define i8* @strf16_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strf16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 2 %2 = bitcast i8* %y to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 8 + store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } diff --git a/test/CodeGen/Thumb2/mve-ldst-preinc.ll b/test/CodeGen/Thumb2/mve-ldst-preinc.ll index c0ff5cfc81c..1b042300576 100644 --- a/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ b/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s -define i8* @post_ldrwu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_4: +define i8* @ldrwu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 @@ -11,14 +11,14 @@ define i8* @post_ldrwu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_3: +define i8* @ldrwu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -27,14 +27,14 @@ define i8* @post_ldrwu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m4: +define i8* @ldrwu32_m4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4] ; CHECK-NEXT: subs r0, #4 @@ -43,14 +43,14 @@ define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -4 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_508(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_508: +define i8* @ldrwu32_508(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -59,14 +59,14 @@ define i8* @post_ldrwu32_508(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 508 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_512(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_512: +define i8* @ldrwu32_512(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -75,14 +75,14 @@ define i8* @post_ldrwu32_512(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 512 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m508: +define i8* @ldrwu32_m508(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: sub.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -91,14 +91,14 @@ define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -508 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrwu32_m512(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwu32_m512: +define i8* @ldrwu32_m512(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwu32_m512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -107,15 +107,15 @@ define i8* @post_ldrwu32_m512(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 -512 %0 = bitcast i8* %z to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_ldrhu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_4: +define i8* @ldrhu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 @@ -124,15 +124,15 @@ define i8* @post_ldrhu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_3: +define i8* @ldrhu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u32 q0, [r0] @@ -141,15 +141,15 @@ define i8* @post_ldrhu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_2: +define i8* @ldrhu32_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrh.u32 q0, [r0] @@ -158,15 +158,15 @@ define i8* @post_ldrhu32_2(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_254: +define i8* @ldrhu32_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrh.u32 q0, [r0] @@ -175,15 +175,15 @@ define i8* @post_ldrhu32_254(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu32_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu32_256: +define i8* @ldrhu32_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u32 q0, [r0] @@ -192,16 +192,16 @@ define i8* @post_ldrhu32_256(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = zext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_4: +define i8* @ldrhs32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 @@ -210,15 +210,15 @@ define i8* @post_ldrhs32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_3: +define i8* @ldrhs32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.s32 q0, [r0] @@ -227,15 +227,15 @@ define i8* @post_ldrhs32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_2: +define i8* @ldrhs32_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrh.s32 q0, [r0] @@ -244,15 +244,15 @@ define i8* @post_ldrhs32_2(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_254: +define i8* @ldrhs32_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrh.s32 q0, [r0] @@ -261,15 +261,15 @@ define i8* @post_ldrhs32_254(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhs32_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhs32_256: +define i8* @ldrhs32_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.s32 q0, [r0] @@ -278,97 +278,97 @@ define i8* @post_ldrhs32_256(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %z to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = sext <4 x i16> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrhu16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_4: +define i8* @ldrhu16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u16 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_3: +define i8* @ldrhu16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_2(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_2: +define i8* @ldrhu16_2(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_2: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r0, #2] ; CHECK-NEXT: adds r0, #2 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 2 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_254(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_254: +define i8* @ldrhu16_254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 254 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrhu16_256(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrhu16_256: +define i8* @ldrhu16_256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #256 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 256 %0 = bitcast i8* %z to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_ldrbu32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_4: +define i8* @ldrbu32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 @@ -377,15 +377,15 @@ define i8* @post_ldrbu32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_3: +define i8* @ldrbu32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.u32 q0, [r0] @@ -394,15 +394,15 @@ define i8* @post_ldrbu32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu32_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_127: +define i8* @ldrbu32_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u32 q0, [r0] @@ -411,15 +411,15 @@ define i8* @post_ldrbu32_127(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu32_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu32_128: +define i8* @ldrbu32_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u32 q0, [r0] @@ -428,16 +428,16 @@ define i8* @post_ldrbu32_128(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = zext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_4: +define i8* @ldrbs32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 @@ -446,15 +446,15 @@ define i8* @post_ldrbs32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_3: +define i8* @ldrbs32_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.s32 q0, [r0] @@ -463,15 +463,15 @@ define i8* @post_ldrbs32_3(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_127: +define i8* @ldrbs32_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.s32 q0, [r0] @@ -480,15 +480,15 @@ define i8* @post_ldrbs32_127(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbs32_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs32_128: +define i8* @ldrbs32_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.s32 q0, [r0] @@ -497,218 +497,218 @@ define i8* @post_ldrbs32_128(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = sext <4 x i8> %1 to <4 x i32> %3 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %2, <4 x i32>* %3, align 8 + store <4 x i32> %2, <4 x i32>* %3, align 4 ret i8* %z } -define i8* @post_ldrbu16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_4: +define i8* @ldrbu16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_3: +define i8* @ldrbu16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu16_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_127: +define i8* @ldrbu16_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu16_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu16_128: +define i8* @ldrbu16_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = zext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_4: +define i8* @ldrbs16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_3: +define i8* @ldrbs16_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_127: +define i8* @ldrbs16_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbs16_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbs16_128: +define i8* @ldrbs16_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = sext <8 x i8> %1 to <8 x i16> %3 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 8 + store <8 x i16> %2, <8 x i16>* %3, align 2 ret i8* %z } -define i8* @post_ldrbu8_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_4: +define i8* @ldrbu8_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] +; CHECK-NEXT: vldrb.u8 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrbu8_3(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_3: +define i8* @ldrbu8_3(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrbu8_127(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_127: +define i8* @ldrbu8_127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 127 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrbu8_128(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrbu8_128: +define i8* @ldrbu8_128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #128] ; CHECK-NEXT: adds r0, #128 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 128 %0 = bitcast i8* %z to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_ldrwf32_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwf32_4: +define i8* @ldrwf32_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 @@ -717,25 +717,106 @@ define i8* @post_ldrwf32_4(i8* %x, i8* %y) { entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 8 + %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = bitcast i8* %y to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 8 + store <4 x float> %1, <4 x float>* %2, align 4 ret i8* %z } -define i8* @post_ldrwf16_4(i8* %x, i8* %y) { -; CHECK-LABEL: post_ldrwf16_4: +define i8* @ldrwf16_4(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u16 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 4 %0 = bitcast i8* %z to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 8 + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrwi32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrwi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 1 + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhi16_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhi16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 1 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhi32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 1 + %2 = bitcast i8* %y to <4 x i32>* + %3 = sext <4 x i16> %1 to <4 x i32> + store <4 x i32> %3, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrf32_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrf32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 1 + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrf16_align1(i8* %x, i8* %y) { +; CHECK-LABEL: ldrf16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 1 %2 = bitcast i8* %y to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 8 + store <8 x half> %1, <8 x half>* %2, align 2 ret i8* %z } @@ -743,8 +824,8 @@ entry: -define i8* @post_strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_4: +define i8* @strw32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0, #4] @@ -753,14 +834,14 @@ define i8* @post_strw32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_3: +define i8* @strw32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -769,14 +850,14 @@ define i8* @post_strw32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m4: +define i8* @strw32_m4(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0, #-4] @@ -785,14 +866,14 @@ define i8* @post_strw32_m4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_508(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_508: +define i8* @strw32_508(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -801,14 +882,14 @@ define i8* @post_strw32_508(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_512(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_512: +define i8* @strw32_512(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -817,14 +898,14 @@ define i8* @post_strw32_512(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_m508(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m508: +define i8* @strw32_m508(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: sub.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -833,14 +914,14 @@ define i8* @post_strw32_m508(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strw32_m512(i8* %y, i8* %x) { -; CHECK-LABEL: post_strw32_m512: +define i8* @strw32_m512(i8* %y, i8* %x) { +; CHECK-LABEL: strw32_m512: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -849,15 +930,15 @@ define i8* @post_strw32_m512(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 -512 %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 8 + %1 = load <4 x i32>, <4 x i32>* %0, align 4 %2 = bitcast i8* %z to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 8 + store <4 x i32> %1, <4 x i32>* %2, align 4 ret i8* %z } -define i8* @post_strh32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_4: +define i8* @strh32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vstrh.32 q0, [r0, #4] @@ -866,14 +947,14 @@ define i8* @post_strh32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_3: +define i8* @strh32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u32 q0, [r1] @@ -882,14 +963,14 @@ define i8* @post_strh32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_2(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_2: +define i8* @strh32_2(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrh.u32 q0, [r1] @@ -898,14 +979,14 @@ define i8* @post_strh32_2(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_254(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_254: +define i8* @strh32_254(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrh.u32 q0, [r1] @@ -914,14 +995,14 @@ define i8* @post_strh32_254(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 254 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh32_256(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh32_256: +define i8* @strh32_256(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u32 q0, [r1] @@ -930,96 +1011,96 @@ define i8* @post_strh32_256(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 256 %0 = bitcast i8* %x to <4 x i16>* - %1 = load <4 x i16>, <4 x i16>* %0, align 8 + %1 = load <4 x i16>, <4 x i16>* %0, align 2 %2 = bitcast i8* %z to <4 x i16>* - store <4 x i16> %1, <4 x i16>* %2, align 8 + store <4 x i16> %1, <4 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_4: +define i8* @strh16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_3: +define i8* @strh16_3(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_2: +define i8* @strh16_2(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #2] ; CHECK-NEXT: adds r0, #2 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_254(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_254: +define i8* @strh16_254(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strh16_256(i8* %y, i8* %x) { -; CHECK-LABEL: post_strh16_256: +define i8* @strh16_256(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: add.w r0, r0, #256 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 256 %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %1 = load <8 x i16>, <8 x i16>* %0, align 2 %2 = bitcast i8* %z to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 8 + store <8 x i16> %1, <8 x i16>* %2, align 2 ret i8* %z } -define i8* @post_strb32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_4: +define i8* @strb32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vstrb.32 q0, [r0, #4] @@ -1028,14 +1109,14 @@ define i8* @post_strb32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb32_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_3: +define i8* @strb32_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.u32 q0, [r1] @@ -1044,14 +1125,14 @@ define i8* @post_strb32_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb32_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_127: +define i8* @strb32_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u32 q0, [r1] @@ -1060,14 +1141,14 @@ define i8* @post_strb32_127(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb32_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb32_128: +define i8* @strb32_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u32 q0, [r1] @@ -1076,15 +1157,15 @@ define i8* @post_strb32_128(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <4 x i8>* - %1 = load <4 x i8>, <4 x i8>* %0, align 8 + %1 = load <4 x i8>, <4 x i8>* %0, align 1 %2 = bitcast i8* %z to <4 x i8>* - store <4 x i8> %1, <4 x i8>* %2, align 8 + store <4 x i8> %1, <4 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_4: +define i8* @strb16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vstrb.16 q0, [r0, #4] @@ -1093,14 +1174,14 @@ define i8* @post_strb16_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_3: +define i8* @strb16_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.u16 q0, [r1] @@ -1109,14 +1190,14 @@ define i8* @post_strb16_3(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_127: +define i8* @strb16_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u16 q0, [r1] @@ -1125,14 +1206,14 @@ define i8* @post_strb16_127(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb16_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb16_128: +define i8* @strb16_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_128: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u16 q0, [r1] @@ -1141,79 +1222,79 @@ define i8* @post_strb16_128(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <8 x i8>* - %1 = load <8 x i8>, <8 x i8>* %0, align 8 + %1 = load <8 x i8>, <8 x i8>* %0, align 1 %2 = bitcast i8* %z to <8 x i8>* - store <8 x i8> %1, <8 x i8>* %2, align 8 + store <8 x i8> %1, <8 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_4: +define i8* @strb8_4(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_3(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_3: +define i8* @strb8_3(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] ; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_127(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_127: +define i8* @strb8_127(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strb8_128(i8* %y, i8* %x) { -; CHECK-LABEL: post_strb8_128: +define i8* @strb8_128(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #128] ; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 128 %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 8 + %1 = load <16 x i8>, <16 x i8>* %0, align 1 %2 = bitcast i8* %z to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 8 + store <16 x i8> %1, <16 x i8>* %2, align 1 ret i8* %z } -define i8* @post_strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strf32_4: +define i8* @strf32_4(i8* %y, i8* %x) { +; CHECK-LABEL: strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vstrw.32 q0, [r0, #4] @@ -1222,24 +1303,105 @@ define i8* @post_strf32_4(i8* %y, i8* %x) { entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 8 + %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = bitcast i8* %z to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 8 + store <4 x float> %1, <4 x float>* %2, align 4 ret i8* %z } -define i8* @post_strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: post_strf16_4: +define i8* @strf16_4(i8* %y, i8* %x) { +; CHECK-LABEL: strf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #4] ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 8 + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @strwi32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strwi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 1 + ret i8* %z +} + +define i8* @strhi16_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strhi16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 1 + ret i8* %z +} + +define i8* @strhi32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strhi32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i16>* + %3 = trunc <4 x i32> %1 to <4 x i16> + store <4 x i16> %3, <4 x i16>* %2, align 1 + ret i8* %z +} + +define i8* @strf32_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strf32_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 1 + ret i8* %z +} + +define i8* @strf16_align1(i8* %y, i8* %x) { +; CHECK-LABEL: strf16_align1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 2 %2 = bitcast i8* %z to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 8 + store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } diff --git a/test/CodeGen/Thumb2/mve-ldst-regimm.ll b/test/CodeGen/Thumb2/mve-ldst-regimm.ll index e71310f3da7..e5cd21a43af 100644 --- a/test/CodeGen/Thumb2/mve-ldst-regimm.ll +++ b/test/CodeGen/Thumb2/mve-ldst-regimm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s %struct.s_int8_t = type { [16 x i8], [16 x i8] } %struct.s_int16_t = type { [8 x i16], [8 x i16] } diff --git a/test/CodeGen/Thumb2/mve-loadstore.ll b/test/CodeGen/Thumb2/mve-loadstore.ll index 9f148c4d841..f02ce15b55c 100644 --- a/test/CodeGen/Thumb2/mve-loadstore.ll +++ b/test/CodeGen/Thumb2/mve-loadstore.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(<4 x i32>* %vp) { ; CHECK-LABEL: load_4xi32_a4: