From cfc091852bfc3c1fe9c2731e73f7ee9e832a2e60 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 4 Sep 2017 22:01:25 +0000 Subject: [PATCH] [x86] add tests for vector store merge opportunity; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312504 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/merge-store-constants.ll | 139 ++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 test/CodeGen/X86/merge-store-constants.ll diff --git a/test/CodeGen/X86/merge-store-constants.ll b/test/CodeGen/X86/merge-store-constants.ll new file mode 100644 index 00000000000..a06f43f7a11 --- /dev/null +++ b/test/CodeGen/X86/merge-store-constants.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 + +define void @big_nonzero_16_bytes(i32* nocapture %a) { +; X32-LABEL: big_nonzero_16_bytes: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl $1, (%eax) +; X32-NEXT: movl $2, 4(%eax) +; X32-NEXT: movl $3, 8(%eax) +; X32-NEXT: movl $4, 12(%eax) +; X32-NEXT: retl +; +; X64-LABEL: big_nonzero_16_bytes: +; X64: # BB#0: +; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001 +; X64-NEXT: movq %rax, (%rdi) +; X64-NEXT: movabsq $17179869187, %rax # imm = 0x400000003 +; X64-NEXT: movq %rax, 8(%rdi) +; X64-NEXT: retq + %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3 + + store i32 1, i32* %a, align 4 + store i32 2, i32* %arrayidx1, align 4 + store i32 3, i32* %arrayidx2, align 4 + store i32 4, i32* %arrayidx3, align 4 + ret void +} + +; Splats may be an opportunity to use a broadcast op. + +define void @big_nonzero_32_bytes_splat(i32* nocapture %a) { +; X32-LABEL: big_nonzero_32_bytes_splat: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl $42, (%eax) +; X32-NEXT: movl $42, 4(%eax) +; X32-NEXT: movl $42, 8(%eax) +; X32-NEXT: movl $42, 12(%eax) +; X32-NEXT: movl $42, 16(%eax) +; X32-NEXT: movl $42, 20(%eax) +; X32-NEXT: movl $42, 24(%eax) +; X32-NEXT: movl $42, 28(%eax) +; X32-NEXT: retl +; +; X64-LABEL: big_nonzero_32_bytes_splat: +; X64: # BB#0: +; X64-NEXT: movabsq $180388626474, %rax # imm = 0x2A0000002A +; X64-NEXT: movq %rax, (%rdi) +; X64-NEXT: movq %rax, 8(%rdi) +; X64-NEXT: movq %rax, 16(%rdi) +; X64-NEXT: movq %rax, 24(%rdi) +; X64-NEXT: retq + %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3 + %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 4 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 5 + %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 6 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 7 + + store i32 42, i32* %a, align 4 + store i32 42, i32* %arrayidx1, align 4 + store i32 42, i32* %arrayidx2, align 4 + store i32 42, i32* %arrayidx3, align 4 + store i32 42, i32* %arrayidx4, align 4 + store i32 42, i32* %arrayidx5, align 4 + store i32 42, i32* %arrayidx6, align 4 + store i32 42, i32* %arrayidx7, align 4 + ret void +} + +; Verify that we choose the best-sized store(s) for each chunk. + +define void @big_nonzero_63_bytes(i8* nocapture %a) { +; X32-LABEL: big_nonzero_63_bytes: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl $0, 4(%eax) +; X32-NEXT: movl $1, (%eax) +; X32-NEXT: movl $0, 12(%eax) +; X32-NEXT: movl $2, 8(%eax) +; X32-NEXT: movl $0, 20(%eax) +; X32-NEXT: movl $3, 16(%eax) +; X32-NEXT: movl $0, 28(%eax) +; X32-NEXT: movl $4, 24(%eax) +; X32-NEXT: movl $0, 36(%eax) +; X32-NEXT: movl $5, 32(%eax) +; X32-NEXT: movl $0, 44(%eax) +; X32-NEXT: movl $6, 40(%eax) +; X32-NEXT: movl $0, 52(%eax) +; X32-NEXT: movl $7, 48(%eax) +; X32-NEXT: movl $8, 56(%eax) +; X32-NEXT: movw $9, 60(%eax) +; X32-NEXT: movb $10, 62(%eax) +; X32-NEXT: retl +; +; X64-LABEL: big_nonzero_63_bytes: +; X64: # BB#0: +; X64-NEXT: movq $1, (%rdi) +; X64-NEXT: movq $2, 8(%rdi) +; X64-NEXT: movq $3, 16(%rdi) +; X64-NEXT: movq $4, 24(%rdi) +; X64-NEXT: movq $5, 32(%rdi) +; X64-NEXT: movq $6, 40(%rdi) +; X64-NEXT: movq $7, 48(%rdi) +; X64-NEXT: movl $8, 56(%rdi) +; X64-NEXT: movw $9, 60(%rdi) +; X64-NEXT: movb $10, 62(%rdi) +; X64-NEXT: retq + %a8 = bitcast i8* %a to i64* + %arrayidx8 = getelementptr inbounds i64, i64* %a8, i64 1 + %arrayidx16 = getelementptr inbounds i64, i64* %a8, i64 2 + %arrayidx24 = getelementptr inbounds i64, i64* %a8, i64 3 + %arrayidx32 = getelementptr inbounds i64, i64* %a8, i64 4 + %arrayidx40 = getelementptr inbounds i64, i64* %a8, i64 5 + %arrayidx48 = getelementptr inbounds i64, i64* %a8, i64 6 + %a4 = bitcast i8* %a to i32* + %arrayidx56 = getelementptr inbounds i32, i32* %a4, i64 14 + %a2 = bitcast i8* %a to i16* + %arrayidx60 = getelementptr inbounds i16, i16* %a2, i64 30 + %arrayidx62 = getelementptr inbounds i8, i8* %a, i64 62 + + store i64 1, i64* %a8 + store i64 2, i64* %arrayidx8 + store i64 3, i64* %arrayidx16 + store i64 4, i64* %arrayidx24 + store i64 5, i64* %arrayidx32 + store i64 6, i64* %arrayidx40 + store i64 7, i64* %arrayidx48 + store i32 8, i32* %arrayidx56 + store i16 9, i16* %arrayidx60 + store i8 10, i8* %arrayidx62 + ret void +} + -- 2.50.1