From ada258617b2cfdb6a4a34478cc86a8c68341c644 Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Fri, 23 Aug 2019 13:30:45 +0000 Subject: [PATCH] [PowerPC] Automatically generate various tests. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369754 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/ppc64-align-long-double.ll | 78 +-- .../umulo-128-legalisation-lowering.ll | 314 +++++----- test/CodeGen/PowerPC/unal-vec-ldst.ll | 573 ++++++++++-------- 3 files changed, 508 insertions(+), 457 deletions(-) diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll index 8a5c5731865..c9a45a881f1 100644 --- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll +++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O2 -fast-isel=false -mattr=-vsx < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O2 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-P9 %s @@ -12,49 +13,48 @@ target triple = "powerpc64-unknown-linux-gnu" %struct.S = type { double, ppc_fp128 } +; The additional stores are caused because we forward the value in the +; store->load->bitcast path to make a store and bitcast of the same +; value. Since the target does bitcast through memory and we no longer +; remember the address we need to do the store in a fresh local +; address. define ppc_fp128 @test(%struct.S* byval %x) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: std 5, -16(1) +; CHECK-NEXT: std 6, -8(1) +; CHECK-NEXT: lfd 1, -16(1) +; CHECK-NEXT: lfd 2, -8(1) +; CHECK-NEXT: std 6, 72(1) +; CHECK-NEXT: std 5, 64(1) +; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: std 4, 56(1) +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: test: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT: std 3, 48(1) +; CHECK-VSX-NEXT: std 6, 72(1) +; CHECK-VSX-NEXT: std 5, 64(1) +; CHECK-VSX-NEXT: std 4, 56(1) +; CHECK-VSX-NEXT: std 5, -16(1) +; CHECK-VSX-NEXT: std 6, -8(1) +; CHECK-VSX-NEXT: lfd 1, -16(1) +; CHECK-VSX-NEXT: lfd 2, -8(1) +; CHECK-VSX-NEXT: blr +; +; CHECK-P9-LABEL: test: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: mtvsrd 1, 5 +; CHECK-P9-NEXT: mtvsrd 2, 6 +; CHECK-P9-NEXT: std 6, 72(1) +; CHECK-P9-NEXT: std 5, 64(1) +; CHECK-P9-NEXT: std 3, 48(1) +; CHECK-P9-NEXT: std 4, 56(1) +; CHECK-P9-NEXT: blr entry: %b = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1 %0 = load ppc_fp128, ppc_fp128* %b, align 16 ret ppc_fp128 %0 } -; The additional stores are caused because we forward the value in the -; store->load->bitcast path to make a store and bitcast of the same -; value. Since the target does bitcast through memory and we no longer -; remember the address we need to do the store in a fresh local -; address. - -; CHECK-DAG: std 6, 72(1) -; CHECK-DAG: std 5, 64(1) -; CHECK-DAG: std 4, 56(1) -; CHECK-DAG: std 3, 48(1) - -; CHECK-DAG: std 5, -16(1) -; CHECK-DAG: std 6, -8(1) -; CHECK-DAG: lfd 1, -16(1) -; CHECK-DAG: lfd 2, -8(1) - -; FIXMECHECK: lfd 1, 64(1) -; FIXMECHECK: lfd 2, 72(1) - -; CHECK-VSX-DAG: std 6, 72(1) -; CHECK-VSX-DAG: std 5, 64(1) -; CHECK-VSX-DAG: std 4, 56(1) -; CHECK-VSX-DAG: std 3, 48(1) -; CHECK-VSX-DAG: std 5, -16(1) -; CHECK-VSX-DAG: std 6, -8(1) -; CHECK-VSX: lfd 1, -16(1) -; CHECK-VSX: lfd 2, -8(1) - -; FIXME-VSX: addi 4, 1, 48 -; FIXME-VSX: lxsdx 1, 4, 3 -; FIXME-VSX: li 3, 24 -; FIXME-VSX: lxsdx 2, 4, 3 - -; CHECK-P9-DAG: std 6, 72(1) -; CHECK-P9-DAG: std 5, 64(1) -; CHECK-P9-DAG: std 4, 56(1) -; CHECK-P9-DAG: std 3, 48(1) -; CHECK-P9-DAG: mtvsrd 1, 5 -; CHECK-P9-DAG: mtvsrd 2, 6 diff --git a/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll index 2ca96df78c5..b3c9012b6e0 100644 --- a/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ b/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -1,164 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64 ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC32 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { - ; PPC64-LABEL muloti_test: -; PPC64: mulld 8, 5, 4 -; PPC64-NEXT: cmpdi 5, 3, 0 -; PPC64-NEXT: mulhdu. 9, 3, 6 -; PPC64-NEXT: mulld 3, 3, 6 -; PPC64-NEXT: mcrf 1, 0 -; PPC64-NEXT: add 3, 3, 8 -; PPC64-NEXT: cmpdi 5, 0 -; PPC64-NEXT: crnor 20, 2, 22 -; PPC64-NEXT: cmpldi 3, 0 -; PPC64-NEXT: mulhdu 8, 4, 6 -; PPC64-NEXT: add 3, 8, 3 -; PPC64-NEXT: cmpld 6, 3, 8 -; PPC64-NEXT: crandc 21, 24, 2 -; PPC64-NEXT: crorc 20, 20, 6 -; PPC64-NEXT: li 7, 1 -; PPC64-NEXT: mulhdu. 5, 5, 4 -; PPC64-NEXT: crorc 20, 20, 2 -; PPC64-NEXT: crnor 20, 20, 21 -; PPC64-NEXT: mulld 4, 4, 6 -; PPC64-NEXT: bc 12, 20, .LBB0_2 -; PPC64: ori 5, 7, 0 -; PPC64-NEXT: blr -; PPC64-NEXT: .LBB0_2: -; PPC64-NEXT: addi 5, 0, 0 -; PPC64-NEXT: blr +; PPC64-LABEL: muloti_test: +; PPC64: # %bb.0: # %start +; PPC64-NEXT: mulld 8, 5, 4 +; PPC64-NEXT: cmpdi 5, 3, 0 +; PPC64-NEXT: mulhdu. 9, 3, 6 +; PPC64-NEXT: mulld 3, 3, 6 +; PPC64-NEXT: mcrf 1, 0 +; PPC64-NEXT: add 3, 3, 8 +; PPC64-NEXT: cmpdi 5, 0 +; PPC64-NEXT: crnor 20, 2, 22 +; PPC64-NEXT: cmpldi 3, 0 +; PPC64-NEXT: mulhdu 8, 4, 6 +; PPC64-NEXT: add 3, 8, 3 +; PPC64-NEXT: cmpld 6, 3, 8 +; PPC64-NEXT: crandc 21, 24, 2 +; PPC64-NEXT: crorc 20, 20, 6 +; PPC64-NEXT: li 7, 1 +; PPC64-NEXT: mulhdu. 5, 5, 4 +; PPC64-NEXT: crorc 20, 20, 2 +; PPC64-NEXT: crnor 20, 20, 21 +; PPC64-NEXT: mulld 4, 4, 6 +; PPC64-NEXT: bc 12, 20, .LBB0_2 +; PPC64-NEXT: # %bb.1: # %start +; PPC64-NEXT: ori 5, 7, 0 +; PPC64-NEXT: blr +; PPC64-NEXT: .LBB0_2: # %start +; PPC64-NEXT: addi 5, 0, 0 +; PPC64-NEXT: blr ; +; PPC32-LABEL: muloti_test: +; PPC32: # %bb.0: # %start +; PPC32-NEXT: mflr 0 +; PPC32-NEXT: stw 0, 4(1) +; PPC32-NEXT: stwu 1, -80(1) +; PPC32-NEXT: .cfi_def_cfa_offset 80 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: .cfi_offset r20, -48 +; PPC32-NEXT: .cfi_offset r21, -44 +; PPC32-NEXT: .cfi_offset r22, -40 +; PPC32-NEXT: .cfi_offset r23, -36 +; PPC32-NEXT: .cfi_offset r24, -32 +; PPC32-NEXT: .cfi_offset r25, -28 +; PPC32-NEXT: .cfi_offset r26, -24 +; PPC32-NEXT: .cfi_offset r27, -20 +; PPC32-NEXT: .cfi_offset r28, -16 +; PPC32-NEXT: .cfi_offset r29, -12 +; PPC32-NEXT: .cfi_offset r30, -8 +; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill +; PPC32-NEXT: mfcr 12 +; PPC32-NEXT: mr 30, 8 +; PPC32-NEXT: mr 29, 7 +; PPC32-NEXT: mr 27, 4 +; PPC32-NEXT: mr 26, 3 +; PPC32-NEXT: li 3, 0 +; PPC32-NEXT: li 4, 0 +; PPC32-NEXT: li 7, 0 +; PPC32-NEXT: li 8, 0 +; PPC32-NEXT: stw 20, 32(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 25, 10 +; PPC32-NEXT: stw 12, 28(1) +; PPC32-NEXT: mr 28, 9 +; PPC32-NEXT: mr 23, 6 +; PPC32-NEXT: mr 24, 5 +; PPC32-NEXT: bl __multi3@PLT +; PPC32-NEXT: mr 7, 4 +; PPC32-NEXT: mullw 4, 24, 30 +; PPC32-NEXT: mullw 8, 29, 23 +; PPC32-NEXT: mullw 10, 28, 27 +; PPC32-NEXT: mullw 11, 26, 25 +; PPC32-NEXT: mulhwu 9, 30, 23 +; PPC32-NEXT: mulhwu 12, 27, 25 +; PPC32-NEXT: mullw 0, 30, 23 +; PPC32-NEXT: mullw 22, 27, 25 +; PPC32-NEXT: add 21, 8, 4 +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: addc 4, 22, 0 +; PPC32-NEXT: add 11, 9, 21 +; PPC32-NEXT: add 0, 12, 10 +; PPC32-NEXT: adde 8, 0, 11 +; PPC32-NEXT: addc 4, 7, 4 +; PPC32-NEXT: adde 8, 3, 8 +; PPC32-NEXT: xor 22, 4, 7 +; PPC32-NEXT: xor 20, 8, 3 +; PPC32-NEXT: or. 22, 22, 20 +; PPC32-NEXT: mcrf 1, 0 +; PPC32-NEXT: cmpwi 29, 0 +; PPC32-NEXT: cmpwi 5, 24, 0 +; PPC32-NEXT: cmpwi 6, 26, 0 +; PPC32-NEXT: cmpwi 7, 28, 0 +; PPC32-NEXT: crnor 8, 22, 2 +; PPC32-NEXT: mulhwu. 23, 29, 23 +; PPC32-NEXT: crnor 9, 30, 26 +; PPC32-NEXT: mcrf 5, 0 +; PPC32-NEXT: cmplwi 21, 0 +; PPC32-NEXT: cmplw 6, 11, 9 +; PPC32-NEXT: cmplwi 7, 10, 0 +; PPC32-NEXT: crandc 10, 24, 2 +; PPC32-NEXT: cmplw 3, 0, 12 +; PPC32-NEXT: mulhwu. 9, 24, 30 +; PPC32-NEXT: mcrf 6, 0 +; PPC32-NEXT: crandc 11, 12, 30 +; PPC32-NEXT: cmplw 4, 7 +; PPC32-NEXT: cmplw 7, 8, 3 +; PPC32-NEXT: crand 12, 30, 0 +; PPC32-NEXT: crandc 13, 28, 30 +; PPC32-NEXT: mulhwu. 3, 26, 25 +; PPC32-NEXT: mcrf 7, 0 +; PPC32-NEXT: cror 0, 12, 13 +; PPC32-NEXT: crandc 12, 0, 6 +; PPC32-NEXT: crorc 20, 8, 22 +; PPC32-NEXT: crorc 20, 20, 26 +; PPC32-NEXT: mulhwu. 3, 28, 27 +; PPC32-NEXT: mcrf 1, 0 +; PPC32-NEXT: crorc 25, 9, 30 +; PPC32-NEXT: or. 3, 27, 26 +; PPC32-NEXT: cror 24, 20, 10 +; PPC32-NEXT: mcrf 5, 0 +; PPC32-NEXT: crorc 25, 25, 6 +; PPC32-NEXT: or. 3, 30, 29 +; PPC32-NEXT: cror 25, 25, 11 +; PPC32-NEXT: crnor 20, 2, 22 +; PPC32-NEXT: lwz 12, 28(1) +; PPC32-NEXT: cror 20, 20, 25 +; PPC32-NEXT: cror 20, 20, 24 +; PPC32-NEXT: crnor 20, 20, 12 +; PPC32-NEXT: li 3, 1 +; PPC32-NEXT: bc 12, 20, .LBB0_2 +; PPC32-NEXT: # %bb.1: # %start +; PPC32-NEXT: ori 7, 3, 0 +; PPC32-NEXT: b .LBB0_3 +; PPC32-NEXT: .LBB0_2: # %start +; PPC32-NEXT: addi 7, 0, 0 +; PPC32-NEXT: .LBB0_3: # %start +; PPC32-NEXT: mr 3, 8 +; PPC32-NEXT: mtcrf 32, 12 # cr2 +; PPC32-NEXT: mtcrf 16, 12 # cr3 +; PPC32-NEXT: lwz 30, 72(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 29, 68(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 28, 64(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 27, 60(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 26, 56(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 25, 52(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 24, 48(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 23, 44(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 22, 40(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 21, 36(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 20, 32(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 0, 84(1) +; PPC32-NEXT: addi 1, 1, 80 +; PPC32-NEXT: mtlr 0 +; PPC32-NEXT: blr ; PPC32-LABEL muloti_test: -; PPC32: mflr 0 -; PPC32-NEXT: stw 0, 4(1) -; PPC32-NEXT: stwu 1, -80(1) -; PPC32-NEXT: .cfi_def_cfa_offset 80 -; PPC32-NEXT: .cfi_offset lr, 4 -; PPC32-NEXT: .cfi_offset r20, -48 -; PPC32-NEXT: .cfi_offset r21, -44 -; PPC32-NEXT: .cfi_offset r22, -40 -; PPC32-NEXT: .cfi_offset r23, -36 -; PPC32-NEXT: .cfi_offset r24, -32 -; PPC32-NEXT: .cfi_offset r25, -28 -; PPC32-NEXT: .cfi_offset r26, -24 -; PPC32-NEXT: .cfi_offset r27, -20 -; PPC32-NEXT: .cfi_offset r28, -16 -; PPC32-NEXT: .cfi_offset r29, -12 -; PPC32-NEXT: .cfi_offset r30, -8 -; PPC32-NEXT: stw 26, 56(1) -; PPC32-NEXT: stw 27, 60(1) -; PPC32-NEXT: stw 29, 68(1) -; PPC32-NEXT: stw 30, 72(1) -; PPC32-NEXT: mfcr 12 -; PPC32-NEXT: mr 30, 8 -; PPC32-NEXT: mr 29, 7 -; PPC32-NEXT: mr 27, 4 -; PPC32-NEXT: mr 26, 3 -; PPC32-NEXT: li 3, 0 -; PPC32-NEXT: li 4, 0 -; PPC32-NEXT: li 7, 0 -; PPC32-NEXT: li 8, 0 -; PPC32-NEXT: stw 20, 32(1) -; PPC32-NEXT: stw 21, 36(1) -; PPC32-NEXT: stw 22, 40(1) -; PPC32-NEXT: stw 23, 44(1) -; PPC32-NEXT: stw 24, 48(1) -; PPC32-NEXT: stw 25, 52(1) -; PPC32-NEXT: stw 28, 64(1) -; PPC32-NEXT: mr 25, 10 -; PPC32-NEXT: stw 12, 28(1) -; PPC32-NEXT: mr 28, 9 -; PPC32-NEXT: mr 23, 6 -; PPC32-NEXT: mr 24, 5 -; PPC32-NEXT: bl __multi3@PLT -; PPC32-NEXT: mr 7, 4 -; PPC32-NEXT: mullw 4, 24, 30 -; PPC32-NEXT: mullw 8, 29, 23 -; PPC32-NEXT: mullw 10, 28, 27 -; PPC32-NEXT: mullw 11, 26, 25 -; PPC32-NEXT: mulhwu 9, 30, 23 -; PPC32-NEXT: mulhwu 12, 27, 25 -; PPC32-NEXT: mullw 0, 30, 23 -; PPC32-NEXT: mullw 22, 27, 25 -; PPC32-NEXT: add 21, 8, 4 -; PPC32-NEXT: add 10, 11, 10 -; PPC32-NEXT: addc 4, 22, 0 -; PPC32-NEXT: add 11, 9, 21 -; PPC32-NEXT: add 0, 12, 10 -; PPC32-NEXT: adde 8, 0, 11 -; PPC32-NEXT: addc 4, 7, 4 -; PPC32-NEXT: adde 8, 3, 8 -; PPC32-NEXT: xor 22, 4, 7 -; PPC32-NEXT: xor 20, 8, 3 -; PPC32-NEXT: or. 22, 22, 20 -; PPC32-NEXT: mcrf 1, 0 -; PPC32-NEXT: cmpwi 29, 0 -; PPC32-NEXT: cmpwi 5, 24, 0 -; PPC32-NEXT: cmpwi 6, 26, 0 -; PPC32-NEXT: cmpwi 7, 28, 0 -; PPC32-NEXT: crnor 8, 22, 2 -; PPC32-NEXT: mulhwu. 23, 29, 23 -; PPC32-NEXT: crnor 9, 30, 26 -; PPC32-NEXT: mcrf 5, 0 -; PPC32-NEXT: cmplwi 21, 0 -; PPC32-NEXT: cmplw 6, 11, 9 -; PPC32-NEXT: cmplwi 7, 10, 0 -; PPC32-NEXT: crandc 10, 24, 2 -; PPC32-NEXT: cmplw 3, 0, 12 -; PPC32-NEXT: mulhwu. 9, 24, 30 -; PPC32-NEXT: mcrf 6, 0 -; PPC32-NEXT: crandc 11, 12, 30 -; PPC32-NEXT: cmplw 4, 7 -; PPC32-NEXT: cmplw 7, 8, 3 -; PPC32-NEXT: crand 12, 30, 0 -; PPC32-NEXT: crandc 13, 28, 30 -; PPC32-NEXT: mulhwu. 3, 26, 25 -; PPC32-NEXT: mcrf 7, 0 -; PPC32-NEXT: cror 0, 12, 13 -; PPC32-NEXT: crandc 12, 0, 6 -; PPC32-NEXT: crorc 20, 8, 22 -; PPC32-NEXT: crorc 20, 20, 26 -; PPC32-NEXT: mulhwu. 3, 28, 27 -; PPC32-NEXT: mcrf 1, 0 -; PPC32-NEXT: crorc 25, 9, 30 -; PPC32-NEXT: or. 3, 27, 26 -; PPC32-NEXT: cror 24, 20, 10 -; PPC32-NEXT: mcrf 5, 0 -; PPC32-NEXT: crorc 25, 25, 6 -; PPC32-NEXT: or. 3, 30, 29 -; PPC32-NEXT: cror 25, 25, 11 -; PPC32-NEXT: crnor 20, 2, 22 -; PPC32-NEXT: lwz 12, 28(1) -; PPC32-NEXT: cror 20, 20, 25 -; PPC32-NEXT: cror 20, 20, 24 -; PPC32-NEXT: crnor 20, 20, 12 -; PPC32-NEXT: li 3, 1 -; PPC32-NEXT: bc 12, 20, .LBB0_2 -; PPC32: ori 7, 3, 0 -; PPC32-NEXT: b .LBB0_3 -; PPC32-NEXT:.LBB0_2: -; PPC32-NEXT: addi 7, 0, 0 -; PPC32-NEXT:.LBB0_3: -; PPC32-NEXT: mr 3, 8 -; PPC32-NEXT: mtcrf 32, 12 -; PPC32-NEXT: mtcrf 16, 12 -; PPC32-NEXT: lwz 30, 72(1) -; PPC32-NEXT: lwz 29, 68(1) -; PPC32-NEXT: lwz 28, 64(1) -; PPC32-NEXT: lwz 27, 60(1) -; PPC32-NEXT: lwz 26, 56(1) -; PPC32-NEXT: lwz 25, 52(1) -; PPC32-NEXT: lwz 24, 48(1) -; PPC32-NEXT: lwz 23, 44(1) -; PPC32-NEXT: lwz 22, 40(1) -; PPC32-NEXT: lwz 21, 36(1) -; PPC32-NEXT: lwz 20, 32(1) -; PPC32-NEXT: lwz 0, 84(1) -; PPC32-NEXT: addi 1, 1, 80 -; PPC32-NEXT: mtlr 0 -; PPC32-NEXT: blr start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 %1 = extractvalue { i128, i1 } %0, 0 diff --git a/test/CodeGen/PowerPC/unal-vec-ldst.ll b/test/CodeGen/PowerPC/unal-vec-ldst.ll index f1bd2af4c40..497add38e04 100644 --- a/test/CodeGen/PowerPC/unal-vec-ldst.ll +++ b/test/CodeGen/PowerPC/unal-vec-ldst.ll @@ -1,577 +1,622 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 { +; CHECK-LABEL: test_l_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 15 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: lvx 4, 0, 3 +; CHECK-NEXT: vperm 2, 4, 2, 3 +; CHECK-NEXT: blr entry: %r = load <16 x i8>, <16 x i8>* %p, align 1 ret <16 x i8> %r -; CHECK-LABEL: @test_l_v16i8 -; CHECK-DAG: li [[REG1:[0-9]+]], 15 -; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3 -; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]] -; CHECK: blr } define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 { +; CHECK-LABEL: test_l_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 31 +; CHECK-NEXT: lvsl 5, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lvx 4, 3, 4 +; CHECK-NEXT: lvx 0, 0, 3 +; CHECK-NEXT: vperm 3, 4, 2, 5 +; CHECK-NEXT: vperm 2, 0, 4, 5 +; CHECK-NEXT: blr entry: %r = load <32 x i8>, <32 x i8>* %p, align 1 ret <32 x i8> %r -; CHECK-LABEL: @test_l_v32i8 -; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] -; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 -; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK: blr } define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 { +; CHECK-LABEL: test_l_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 15 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: lvx 4, 0, 3 +; CHECK-NEXT: vperm 2, 4, 2, 3 +; CHECK-NEXT: blr entry: %r = load <8 x i16>, <8 x i16>* %p, align 2 ret <8 x i16> %r -; CHECK-LABEL: @test_l_v8i16 -; CHECK-DAG: li [[REG1:[0-9]+]], 15 -; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3 -; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]] -; CHECK: blr } define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 { +; CHECK-LABEL: test_l_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 31 +; CHECK-NEXT: lvsl 5, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lvx 4, 3, 4 +; CHECK-NEXT: lvx 0, 0, 3 +; CHECK-NEXT: vperm 3, 4, 2, 5 +; CHECK-NEXT: vperm 2, 0, 4, 5 +; CHECK-NEXT: blr entry: %r = load <16 x i16>, <16 x i16>* %p, align 2 ret <16 x i16> %r -; CHECK-LABEL: @test_l_v16i16 -; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] -; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 -; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK: blr } define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 { +; CHECK-LABEL: test_l_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 15 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: lvx 4, 0, 3 +; CHECK-NEXT: vperm 2, 4, 2, 3 +; CHECK-NEXT: blr entry: %r = load <4 x i32>, <4 x i32>* %p, align 4 ret <4 x i32> %r -; CHECK-LABEL: @test_l_v4i32 -; CHECK-DAG: li [[REG1:[0-9]+]], 15 -; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3 -; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]] -; CHECK: blr } define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 { +; CHECK-LABEL: test_l_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 31 +; CHECK-NEXT: lvsl 5, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lvx 4, 3, 4 +; CHECK-NEXT: lvx 0, 0, 3 +; CHECK-NEXT: vperm 3, 4, 2, 5 +; CHECK-NEXT: vperm 2, 0, 4, 5 +; CHECK-NEXT: blr entry: %r = load <8 x i32>, <8 x i32>* %p, align 4 ret <8 x i32> %r -; CHECK-LABEL: @test_l_v8i32 -; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] -; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 -; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK: blr } define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 { +; CHECK-LABEL: test_l_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <2 x i64>, <2 x i64>* %p, align 8 ret <2 x i64> %r -; CHECK-LABEL: @test_l_v2i64 -; CHECK: lxvd2x 34, 0, 3 -; CHECK: blr } define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 { +; CHECK-LABEL: test_l_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: lxvd2x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <4 x i64>, <4 x i64>* %p, align 8 ret <4 x i64> %r -; CHECK-LABEL: @test_l_v4i64 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvd2x 34, 0, 3 -; CHECK-DAG: lxvd2x 35, 3, [[REG1]] -; CHECK: blr } define <4 x float> @test_l_v4float(<4 x float>* %p) #0 { +; CHECK-LABEL: test_l_v4float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 15 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: lvx 4, 0, 3 +; CHECK-NEXT: vperm 2, 4, 2, 3 +; CHECK-NEXT: blr entry: %r = load <4 x float>, <4 x float>* %p, align 4 ret <4 x float> %r -; CHECK-LABEL: @test_l_v4float -; CHECK-DAG: li [[REG1:[0-9]+]], 15 -; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3 -; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]] -; CHECK: blr } define <8 x float> @test_l_v8float(<8 x float>* %p) #0 { +; CHECK-LABEL: test_l_v8float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 31 +; CHECK-NEXT: lvsl 5, 0, 3 +; CHECK-NEXT: lvx 2, 3, 4 +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lvx 4, 3, 4 +; CHECK-NEXT: lvx 0, 0, 3 +; CHECK-NEXT: vperm 3, 4, 2, 5 +; CHECK-NEXT: vperm 2, 0, 4, 5 +; CHECK-NEXT: blr entry: %r = load <8 x float>, <8 x float>* %p, align 4 ret <8 x float> %r -; CHECK-LABEL: @test_l_v8float -; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 -; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] -; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 -; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] -; CHECK: blr } define <2 x double> @test_l_v2double(<2 x double>* %p) #0 { +; CHECK-LABEL: test_l_v2double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <2 x double>, <2 x double>* %p, align 8 ret <2 x double> %r -; CHECK-LABEL: @test_l_v2double -; CHECK: lxvd2x 34, 0, 3 -; CHECK: blr } define <4 x double> @test_l_v4double(<4 x double>* %p) #0 { +; CHECK-LABEL: test_l_v4double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: lxvd2x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <4 x double>, <4 x double>* %p, align 8 ret <4 x double> %r -; CHECK-LABEL: @test_l_v4double -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvd2x 34, 0, 3 -; CHECK-DAG: lxvd2x 35, 3, [[REG1]] -; CHECK: blr } define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 { +; CHECK-LABEL: test_l_p8v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <16 x i8>, <16 x i8>* %p, align 1 ret <16 x i8> %r -; CHECK-LABEL: @test_l_p8v16i8 -; CHECK: lxvw4x 34, 0, 3 -; CHECK: blr } define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 { +; CHECK-LABEL: test_l_p8v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: lxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <32 x i8>, <32 x i8>* %p, align 1 ret <32 x i8> %r -; CHECK-LABEL: @test_l_p8v32i8 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvw4x 34, 0, 3 -; CHECK-DAG: lxvw4x 35, 3, [[REG1]] -; CHECK: blr } define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 { +; CHECK-LABEL: test_l_p8v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <8 x i16>, <8 x i16>* %p, align 2 ret <8 x i16> %r -; CHECK-LABEL: @test_l_p8v8i16 -; CHECK: lxvw4x 34, 0, 3 -; CHECK: blr } define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 { +; CHECK-LABEL: test_l_p8v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: lxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <16 x i16>, <16 x i16>* %p, align 2 ret <16 x i16> %r -; CHECK-LABEL: @test_l_p8v16i16 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvw4x 34, 0, 3 -; CHECK-DAG: lxvw4x 35, 3, [[REG1]] -; CHECK: blr } define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 { +; CHECK-LABEL: test_l_p8v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <4 x i32>, <4 x i32>* %p, align 4 ret <4 x i32> %r -; CHECK-LABEL: @test_l_p8v4i32 -; CHECK: lxvw4x 34, 0, 3 -; CHECK: blr } define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 { +; CHECK-LABEL: test_l_p8v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: lxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <8 x i32>, <8 x i32>* %p, align 4 ret <8 x i32> %r -; CHECK-LABEL: @test_l_p8v8i32 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvw4x 34, 0, 3 -; CHECK-DAG: lxvw4x 35, 3, [[REG1]] -; CHECK: blr } define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 { +; CHECK-LABEL: test_l_p8v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <2 x i64>, <2 x i64>* %p, align 8 ret <2 x i64> %r -; CHECK-LABEL: @test_l_p8v2i64 -; CHECK: lxvd2x 34, 0, 3 -; CHECK: blr } define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 { +; CHECK-LABEL: test_l_p8v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: lxvd2x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <4 x i64>, <4 x i64>* %p, align 8 ret <4 x i64> %r -; CHECK-LABEL: @test_l_p8v4i64 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvd2x 34, 0, 3 -; CHECK-DAG: lxvd2x 35, 3, [[REG1]] -; CHECK: blr } define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 { +; CHECK-LABEL: test_l_p8v4float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <4 x float>, <4 x float>* %p, align 4 ret <4 x float> %r -; CHECK-LABEL: @test_l_p8v4float -; CHECK: lxvw4x 34, 0, 3 -; CHECK: blr } define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 { +; CHECK-LABEL: test_l_p8v8float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvw4x 34, 0, 3 +; CHECK-NEXT: lxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <8 x float>, <8 x float>* %p, align 4 ret <8 x float> %r -; CHECK-LABEL: @test_l_p8v8float -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvw4x 34, 0, 3 -; CHECK-DAG: lxvw4x 35, 3, [[REG1]] -; CHECK: blr } define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 { +; CHECK-LABEL: test_l_p8v2double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: blr entry: %r = load <2 x double>, <2 x double>* %p, align 8 ret <2 x double> %r -; CHECK-LABEL: @test_l_p8v2double -; CHECK: lxvd2x 34, 0, 3 -; CHECK: blr } define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 { +; CHECK-LABEL: test_l_p8v4double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: lxvd2x 34, 0, 3 +; CHECK-NEXT: lxvd2x 35, 3, 4 +; CHECK-NEXT: blr entry: %r = load <4 x double>, <4 x double>* %p, align 8 ret <4 x double> %r -; CHECK-LABEL: @test_l_p8v4double -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: lxvd2x 34, 0, 3 -; CHECK-DAG: lxvd2x 35, 3, [[REG1]] -; CHECK: blr } define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 { +; CHECK-LABEL: test_l_qv4float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 15 +; CHECK-NEXT: qvlpclsx 0, 0, 3 +; CHECK-NEXT: qvlfsx 1, 3, 4 +; CHECK-NEXT: qvlfsx 2, 0, 3 +; CHECK-NEXT: qvfperm 1, 2, 1, 0 +; CHECK-NEXT: blr entry: %r = load <4 x float>, <4 x float>* %p, align 4 ret <4 x float> %r -; CHECK-LABEL: @test_l_qv4float -; CHECK-DAG: li [[REG1:[0-9]+]], 15 -; CHECK-DAG: qvlpclsx 0, 0, 3 -; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 0, 3 -; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0 -; CHECK: blr } define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 { +; CHECK-LABEL: test_l_qv8float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 31 +; CHECK-NEXT: qvlpclsx 1, 0, 3 +; CHECK-NEXT: qvlfsx 0, 3, 4 +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: qvlfsx 3, 3, 4 +; CHECK-NEXT: qvlfsx 4, 0, 3 +; CHECK-NEXT: qvfperm 2, 3, 0, 1 +; CHECK-NEXT: qvfperm 1, 4, 3, 1 +; CHECK-NEXT: blr entry: %r = load <8 x float>, <8 x float>* %p, align 4 ret <8 x float> %r -; CHECK-LABEL: @test_l_qv8float -; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 3, [[REG2]] -; CHECK-DAG: qvlpclsx [[REG5:[0-5]+]], 0, 3 -; CHECK-DAG: qvlfsx [[REG6:[0-9]+]], 0, 3 -; CHECK-DAG: qvfperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG5]] -; CHECK-DAG: qvfperm 1, {{[0-9]+}}, {{[0-9]+}}, [[REG5]] -; CHECK: blr } define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 { +; CHECK-LABEL: test_l_qv4double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 31 +; CHECK-NEXT: qvlpcldx 0, 0, 3 +; CHECK-NEXT: qvlfdx 1, 3, 4 +; CHECK-NEXT: qvlfdx 2, 0, 3 +; CHECK-NEXT: qvfperm 1, 2, 1, 0 +; CHECK-NEXT: blr entry: %r = load <4 x double>, <4 x double>* %p, align 8 ret <4 x double> %r -; CHECK-LABEL: @test_l_qv4double -; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: qvlpcldx 0, 0, 3 -; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 0, 3 -; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0 -; CHECK: blr } define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 { +; CHECK-LABEL: test_l_qv8double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 63 +; CHECK-NEXT: qvlpcldx 1, 0, 3 +; CHECK-NEXT: qvlfdx 0, 3, 4 +; CHECK-NEXT: li 4, 32 +; CHECK-NEXT: qvlfdx 3, 3, 4 +; CHECK-NEXT: qvlfdx 4, 0, 3 +; CHECK-NEXT: qvfperm 2, 3, 0, 1 +; CHECK-NEXT: qvfperm 1, 4, 3, 1 +; CHECK-NEXT: blr entry: %r = load <8 x double>, <8 x double>* %p, align 8 ret <8 x double> %r -; CHECK-LABEL: @test_l_qv8double -; CHECK-DAG: li [[REG1:[0-9]+]], 63 -; CHECK-DAG: li [[REG2:[0-9]+]], 32 -; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 3, [[REG2]] -; CHECK-DAG: qvlpcldx [[REG5:[0-5]+]], 0, 3 -; CHECK-DAG: qvlfdx [[REG6:[0-9]+]], 0, 3 -; CHECK-DAG: qvfperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG5]] -; CHECK-DAG: qvfperm 1, {{[0-9]+}}, {{[0-9]+}}, [[REG5]] -; CHECK: blr } define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 { +; CHECK-LABEL: test_s_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: store <16 x i8> %v, <16 x i8>* %p, align 1 ret void -; CHECK-LABEL: @test_s_v16i8 -; CHECK: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 { +; CHECK-LABEL: test_s_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: stxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: store <32 x i8> %v, <32 x i8>* %p, align 1 ret void -; CHECK-LABEL: @test_s_v32i8 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: stxvw4x 35, 3, [[REG1]] -; CHECK-DAG: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 { +; CHECK-LABEL: test_s_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: store <8 x i16> %v, <8 x i16>* %p, align 2 ret void -; CHECK-LABEL: @test_s_v8i16 -; CHECK: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 { +; CHECK-LABEL: test_s_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: stxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: store <16 x i16> %v, <16 x i16>* %p, align 2 ret void -; CHECK-LABEL: @test_s_v16i16 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: stxvw4x 35, 3, [[REG1]] -; CHECK-DAG: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 { +; CHECK-LABEL: test_s_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: store <4 x i32> %v, <4 x i32>* %p, align 4 ret void -; CHECK-LABEL: @test_s_v4i32 -; CHECK: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 { +; CHECK-LABEL: test_s_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: stxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: store <8 x i32> %v, <8 x i32>* %p, align 4 ret void -; CHECK-LABEL: @test_s_v8i32 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: stxvw4x 35, 3, [[REG1]] -; CHECK-DAG: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 { +; CHECK-LABEL: test_s_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvd2x 34, 0, 3 +; CHECK-NEXT: blr entry: store <2 x i64> %v, <2 x i64>* %p, align 8 ret void -; CHECK-LABEL: @test_s_v2i64 -; CHECK: stxvd2x 34, 0, 3 -; CHECK: blr } define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 { +; CHECK-LABEL: test_s_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: stxvd2x 34, 0, 3 +; CHECK-NEXT: stxvd2x 35, 3, 4 +; CHECK-NEXT: blr entry: store <4 x i64> %v, <4 x i64>* %p, align 8 ret void -; CHECK-LABEL: @test_s_v4i64 -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: stxvd2x 35, 3, [[REG1]] -; CHECK-DAG: stxvd2x 34, 0, 3 -; CHECK: blr } define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_s_v4float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: blr entry: store <4 x float> %v, <4 x float>* %p, align 4 ret void -; CHECK-LABEL: @test_s_v4float -; CHECK: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 { +; CHECK-LABEL: test_s_v8float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: stxvw4x 34, 0, 3 +; CHECK-NEXT: stxvw4x 35, 3, 4 +; CHECK-NEXT: blr entry: store <8 x float> %v, <8 x float>* %p, align 4 ret void -; CHECK-LABEL: @test_s_v8float -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: stxvw4x 35, 3, [[REG1]] -; CHECK-DAG: stxvw4x 34, 0, 3 -; CHECK: blr } define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 { +; CHECK-LABEL: test_s_v2double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvd2x 34, 0, 3 +; CHECK-NEXT: blr entry: store <2 x double> %v, <2 x double>* %p, align 8 ret void -; CHECK-LABEL: @test_s_v2double -; CHECK: stxvd2x 34, 0, 3 -; CHECK: blr } define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 { +; CHECK-LABEL: test_s_v4double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: stxvd2x 34, 0, 3 +; CHECK-NEXT: stxvd2x 35, 3, 4 +; CHECK-NEXT: blr entry: store <4 x double> %v, <4 x double>* %p, align 8 ret void -; CHECK-LABEL: @test_s_v4double -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK-DAG: stxvd2x 35, 3, [[REG1]] -; CHECK-DAG: stxvd2x 34, 0, 3 -; CHECK: blr } define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 { +; CHECK-LABEL: test_s_qv4float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: qvesplati 0, 1, 3 +; CHECK-NEXT: stfs 1, 0(3) +; CHECK-NEXT: stfs 0, 12(3) +; CHECK-NEXT: qvesplati 0, 1, 2 +; CHECK-NEXT: qvesplati 1, 1, 1 +; CHECK-NEXT: stfs 0, 8(3) +; CHECK-NEXT: stfs 1, 4(3) +; CHECK-NEXT: blr entry: store <4 x float> %v, <4 x float>* %p, align 4 ret void -; CHECK-LABEL: @test_s_qv4float -; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3 -; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2 -; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1 -; CHECK-DAG: stfs 1, 0(3) -; CHECK-DAG: stfs [[REG1]], 12(3) -; CHECK-DAG: stfs [[REG2]], 8(3) -; CHECK-DAG: stfs [[REG3]], 4(3) -; CHECK: blr } define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 { +; CHECK-LABEL: test_s_qv8float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: qvesplati 0, 2, 3 +; CHECK-NEXT: stfs 2, 16(3) +; CHECK-NEXT: stfs 0, 28(3) +; CHECK-NEXT: qvesplati 0, 2, 2 +; CHECK-NEXT: qvesplati 2, 2, 1 +; CHECK-NEXT: stfs 1, 0(3) +; CHECK-NEXT: stfs 0, 24(3) +; CHECK-NEXT: qvesplati 0, 1, 3 +; CHECK-NEXT: stfs 2, 20(3) +; CHECK-NEXT: qvesplati 2, 1, 2 +; CHECK-NEXT: qvesplati 1, 1, 1 +; CHECK-NEXT: stfs 0, 12(3) +; CHECK-NEXT: stfs 2, 8(3) +; CHECK-NEXT: stfs 1, 4(3) +; CHECK-NEXT: blr entry: store <8 x float> %v, <8 x float>* %p, align 4 ret void -; CHECK-LABEL: @test_s_qv8float -; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3 -; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2 -; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1 -; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3 -; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2 -; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1 -; CHECK-DAG: stfs 2, 16(3) -; CHECK-DAG: stfs 1, 0(3) -; CHECK-DAG: stfs [[REG1]], 28(3) -; CHECK-DAG: stfs [[REG2]], 24(3) -; CHECK-DAG: stfs [[REG3]], 20(3) -; CHECK-DAG: stfs [[REG4]], 12(3) -; CHECK-DAG: stfs [[REG5]], 8(3) -; CHECK-DAG: stfs [[REG6]], 4(3) -; CHECK: blr } define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 { +; CHECK-LABEL: test_s_qv4double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: qvesplati 0, 1, 3 +; CHECK-NEXT: stfd 1, 0(3) +; CHECK-NEXT: stfd 0, 24(3) +; CHECK-NEXT: qvesplati 0, 1, 2 +; CHECK-NEXT: qvesplati 1, 1, 1 +; CHECK-NEXT: stfd 0, 16(3) +; CHECK-NEXT: stfd 1, 8(3) +; CHECK-NEXT: blr entry: store <4 x double> %v, <4 x double>* %p, align 8 ret void -; CHECK-LABEL: @test_s_qv4double -; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3 -; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2 -; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1 -; CHECK-DAG: stfd 1, 0(3) -; CHECK-DAG: stfd [[REG1]], 24(3) -; CHECK-DAG: stfd [[REG2]], 16(3) -; CHECK-DAG: stfd [[REG3]], 8(3) -; CHECK: blr } define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 { +; CHECK-LABEL: test_s_qv8double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: qvesplati 0, 2, 3 +; CHECK-NEXT: stfd 2, 32(3) +; CHECK-NEXT: stfd 0, 56(3) +; CHECK-NEXT: qvesplati 0, 2, 2 +; CHECK-NEXT: qvesplati 2, 2, 1 +; CHECK-NEXT: stfd 1, 0(3) +; CHECK-NEXT: stfd 0, 48(3) +; CHECK-NEXT: qvesplati 0, 1, 3 +; CHECK-NEXT: stfd 2, 40(3) +; CHECK-NEXT: qvesplati 2, 1, 2 +; CHECK-NEXT: qvesplati 1, 1, 1 +; CHECK-NEXT: stfd 0, 24(3) +; CHECK-NEXT: stfd 2, 16(3) +; CHECK-NEXT: stfd 1, 8(3) +; CHECK-NEXT: blr entry: store <8 x double> %v, <8 x double>* %p, align 8 ret void -; CHECK-LABEL: @test_s_qv8double -; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3 -; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2 -; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1 -; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3 -; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2 -; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1 -; CHECK-DAG: stfd 2, 32(3) -; CHECK-DAG: stfd 1, 0(3) -; CHECK-DAG: stfd [[REG1]], 56(3) -; CHECK-DAG: stfd [[REG2]], 48(3) -; CHECK-DAG: stfd [[REG3]], 40(3) -; CHECK-DAG: stfd [[REG4]], 24(3) -; CHECK-DAG: stfd [[REG5]], 16(3) -; CHECK-DAG: stfd [[REG6]], 8(3) -; CHECK: blr } attributes #0 = { nounwind "target-cpu"="pwr7" } -- 2.40.0