From: Simon Pilgrim Date: Sat, 12 Aug 2017 17:27:35 +0000 (+0000) Subject: [X86] Regenerate merge store tests. NFCI. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c05af7aa8cbfafb74219fb9829c5e7937b1d1cd5;p=llvm [X86] Regenerate merge store tests. NFCI. Gives us a much better idea of what is going on than just relying on a few checks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310780 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index 69f5f4c7a05..5058f1f5ec9 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -1,14 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=0 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } -; CHECK-LABEL: merge_const_store: ; save 1,2,3 ... as one big integer. -; CHECK: movabsq $578437695752307201 -; CHECK: ret define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_const_store: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB0_3 +; CHECK-NEXT: # BB#1: # %.lr.ph.preheader +; CHECK-NEXT: movabsq $578437695752307201, %rax # imm = 0x807060504030201 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %.lr.ph +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: addq $8, %rsi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %._crit_edge +; CHECK-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge .lr.ph: @@ -39,10 +52,23 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt } ; No vectors because we use noimplicitfloat -; CHECK-LABEL: merge_const_store_no_vec: -; CHECK-NOT: vmovups -; CHECK: ret define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{ +; CHECK-LABEL: merge_const_store_no_vec: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB1_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_1: # %.lr.ph +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rsi) +; CHECK-NEXT: movq $0, 8(%rsi) +; CHECK-NEXT: movq $0, 16(%rsi) +; CHECK-NEXT: movq $0, 24(%rsi) +; CHECK-NEXT: addq $32, %rsi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: .LBB1_2: # %._crit_edge +; CHECK-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge .lr.ph: @@ -73,10 +99,23 @@ define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimp } ; Move the constants using a single vector store. -; CHECK-LABEL: merge_const_store_vec: -; CHECK: vmovups -; CHECK: ret define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_const_store_vec: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB2_3 +; CHECK-NEXT: # BB#1: # %.lr.ph.preheader +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_2: # %.lr.ph +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmovups %ymm0, (%rsi) +; CHECK-NEXT: addq $32, %rsi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB2_2 +; CHECK-NEXT: .LBB2_3: # %._crit_edge +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge .lr.ph: @@ -107,13 +146,23 @@ define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind } ; Move the first 4 constants as a single vector. Move the rest as scalars. -; CHECK-LABEL: merge_nonconst_store: -; CHECK: movl $67305985 -; CHECK: movb -; CHECK: movw -; CHECK: movb -; CHECK: ret define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_nonconst_store: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB3_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB3_1: # %.lr.ph +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl $67305985, (%rdx) # imm = 0x4030201 +; CHECK-NEXT: movb %sil, 4(%rdx) +; CHECK-NEXT: movw $1798, 5(%rdx) # imm = 0x706 +; CHECK-NEXT: movb $8, 7(%rdx) +; CHECK-NEXT: addq $8, %rdx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB3_1 +; CHECK-NEXT: .LBB3_2: # %._crit_edge +; CHECK-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge .lr.ph: @@ -143,15 +192,34 @@ define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) n ret void } - -; CHECK-LABEL: merge_loads_i16: -; load: -; BWON: movzwl -; BWOFF: movw -; store: -; CHECK: movw -; CHECK: ret define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { +; BWON-LABEL: merge_loads_i16: +; BWON: # BB#0: +; BWON-NEXT: testl %edi, %edi +; BWON-NEXT: jle .LBB4_2 +; BWON-NEXT: .p2align 4, 0x90 +; BWON-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; BWON-NEXT: movzwl (%rsi), %eax +; BWON-NEXT: movw %ax, (%rdx) +; BWON-NEXT: addq $8, %rdx +; BWON-NEXT: decl %edi +; BWON-NEXT: jne .LBB4_1 +; BWON-NEXT: .LBB4_2: # %._crit_edge +; BWON-NEXT: retq +; +; BWOFF-LABEL: merge_loads_i16: +; BWOFF: # BB#0: +; BWOFF-NEXT: testl %edi, %edi +; BWOFF-NEXT: jle .LBB4_2 +; BWOFF-NEXT: .p2align 4, 0x90 +; BWOFF-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; BWOFF-NEXT: movw (%rsi), %ax +; BWOFF-NEXT: movw %ax, (%rdx) +; BWOFF-NEXT: addq $8, %rdx +; BWOFF-NEXT: decl %edi +; BWOFF-NEXT: jne .LBB4_1 +; BWOFF-NEXT: .LBB4_2: # %._crit_edge +; BWOFF-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -179,15 +247,40 @@ define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struc } ; The loads and the stores are interleaved. Can't merge them. -; CHECK-LABEL: no_merge_loads: -; BWON: movzbl -; BWOFF: movb -; CHECK: movb -; BWON: movzbl -; BWOFF: movb -; CHECK: movb -; CHECK: ret define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { +; BWON-LABEL: no_merge_loads: +; BWON: # BB#0: +; BWON-NEXT: testl %edi, %edi +; BWON-NEXT: jle .LBB5_2 +; BWON-NEXT: .p2align 4, 0x90 +; BWON-NEXT: .LBB5_1: # %a4 +; BWON-NEXT: # =>This Inner Loop Header: Depth=1 +; BWON-NEXT: movzbl (%rsi), %eax +; BWON-NEXT: movb %al, (%rdx) +; BWON-NEXT: movzbl 1(%rsi), %eax +; BWON-NEXT: movb %al, 1(%rdx) +; BWON-NEXT: addq $8, %rdx +; BWON-NEXT: decl %edi +; BWON-NEXT: jne .LBB5_1 +; BWON-NEXT: .LBB5_2: # %._crit_edge +; BWON-NEXT: retq +; +; BWOFF-LABEL: no_merge_loads: +; BWOFF: # BB#0: +; BWOFF-NEXT: testl %edi, %edi +; BWOFF-NEXT: jle .LBB5_2 +; BWOFF-NEXT: .p2align 4, 0x90 +; BWOFF-NEXT: .LBB5_1: # %a4 +; BWOFF-NEXT: # =>This Inner Loop Header: Depth=1 +; BWOFF-NEXT: movb (%rsi), %al +; BWOFF-NEXT: movb %al, (%rdx) +; BWOFF-NEXT: movb 1(%rsi), %al +; BWOFF-NEXT: movb %al, 1(%rdx) +; BWOFF-NEXT: addq $8, %rdx +; BWOFF-NEXT: decl %edi +; BWOFF-NEXT: jne .LBB5_1 +; BWOFF-NEXT: .LBB5_2: # %._crit_edge +; BWOFF-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -214,14 +307,20 @@ a4: ; preds = %4, %.lr.ph ret void } - -; CHECK-LABEL: merge_loads_integer: -; load: -; CHECK: movq -; store: -; CHECK: movq -; CHECK: ret define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_loads_integer: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB6_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq %rax, (%rdx) +; CHECK-NEXT: addq $32, %rdx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB6_1 +; CHECK-NEXT: .LBB6_2: # %._crit_edge +; CHECK-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -248,14 +347,21 @@ define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %s ret void } - -; CHECK-LABEL: merge_loads_vector: -; load: -; CHECK: movups -; store: -; CHECK: movups -; CHECK: ret define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_loads_vector: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB7_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_1: # %block4 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmovups (%rsi), %xmm0 +; CHECK-NEXT: vmovups %xmm0, (%rdx) +; CHECK-NEXT: addq $32, %rdx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB7_1 +; CHECK-NEXT: .LBB7_2: # %._crit_edge +; CHECK-NEXT: retq %a1 = icmp sgt i32 %count, 0 br i1 %a1, label %.lr.ph, label %._crit_edge @@ -290,14 +396,22 @@ block4: ; preds = %4, %.lr.ph ret void } -;; On x86, even unaligned copies can be merged to vector ops. -; CHECK-LABEL: merge_loads_no_align: -; load: -; CHECK: vmovups -; store: -; CHECK: vmovups -; CHECK: ret +; On x86, even unaligned copies can be merged to vector ops. define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_loads_no_align: +; CHECK: # BB#0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB8_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB8_1: # %block4 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmovups (%rsi), %xmm0 +; CHECK-NEXT: vmovups %xmm0, (%rdx) +; CHECK-NEXT: addq $32, %rdx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB8_1 +; CHECK-NEXT: .LBB8_2: # %._crit_edge +; CHECK-NEXT: retq %a1 = icmp sgt i32 %count, 0 br i1 %a1, label %.lr.ph, label %._crit_edge @@ -334,11 +448,36 @@ block4: ; preds = %4, %.lr.ph ; Make sure that we merge the consecutive load/store sequence below and use a ; word (16 bit) instead of a byte copy. -; CHECK-LABEL: MergeLoadStoreBaseIndexOffset: -; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]] -; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]] -; CHECK: movw %[[REG]], (%{{.*}}) define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { +; BWON-LABEL: MergeLoadStoreBaseIndexOffset: +; BWON: # BB#0: +; BWON-NEXT: movl %ecx, %r8d +; BWON-NEXT: xorl %ecx, %ecx +; BWON-NEXT: .p2align 4, 0x90 +; BWON-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; BWON-NEXT: movq (%rdi,%rcx,8), %rax +; BWON-NEXT: movzwl (%rdx,%rax), %eax +; BWON-NEXT: movw %ax, (%rsi,%rcx,2) +; BWON-NEXT: incq %rcx +; BWON-NEXT: cmpl %ecx, %r8d +; BWON-NEXT: jne .LBB9_1 +; BWON-NEXT: # BB#2: +; BWON-NEXT: retq +; +; BWOFF-LABEL: MergeLoadStoreBaseIndexOffset: +; BWOFF: # BB#0: +; BWOFF-NEXT: movl %ecx, %r8d +; BWOFF-NEXT: xorl %ecx, %ecx +; BWOFF-NEXT: .p2align 4, 0x90 +; BWOFF-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; BWOFF-NEXT: movq (%rdi,%rcx,8), %rax +; BWOFF-NEXT: movw (%rdx,%rax), %ax +; BWOFF-NEXT: movw %ax, (%rsi,%rcx,2) +; BWOFF-NEXT: incq %rcx +; BWOFF-NEXT: cmpl %ecx, %r8d +; BWOFF-NEXT: jne .LBB9_1 +; BWOFF-NEXT: # BB#2: +; BWOFF-NEXT: retq br label %1 ;