From: Amaury Sechet Date: Sat, 11 Feb 2017 19:27:15 +0000 (+0000) Subject: Regen expected tests result. NFC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=021167a86e65c467edc9a2553eaae52adfce679b;p=llvm Regen expected tests result. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294866 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll index d5d3fa6db5e..b0ae8ee69d0 100644 --- a/test/CodeGen/X86/atomic-minmax-i6432.ll +++ b/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -1,42 +1,207 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=x86 -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX ; RUN: llc -march=x86 -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC @sc64 = external global i64 define void @atomic_maxmin_i6432() { -; LINUX: atomic_maxmin_i6432 +; LINUX-LABEL: atomic_maxmin_i6432: +; LINUX: # BB#0: +; LINUX-NEXT: pushl %ebx +; LINUX-NEXT: .Lcfi0: +; LINUX-NEXT: .cfi_def_cfa_offset 8 +; LINUX-NEXT: pushl %esi +; LINUX-NEXT: .Lcfi1: +; LINUX-NEXT: .cfi_def_cfa_offset 12 +; LINUX-NEXT: .Lcfi2: +; LINUX-NEXT: .cfi_offset %esi, -12 +; LINUX-NEXT: .Lcfi3: +; LINUX-NEXT: .cfi_offset %ebx, -8 +; LINUX-NEXT: movl sc64+4, %edx +; LINUX-NEXT: movl sc64, %eax +; LINUX-NEXT: movl $4, %esi +; LINUX-NEXT: .p2align 4, 0x90 +; LINUX-NEXT: .LBB0_1: # %atomicrmw.start +; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 +; LINUX-NEXT: cmpl %eax, %esi +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: sbbl %edx, %ecx +; LINUX-NEXT: setl %cl +; LINUX-NEXT: andb $1, %cl +; LINUX-NEXT: movl $5, %ebx +; LINUX-NEXT: cmovnel %eax, %ebx +; LINUX-NEXT: testb %cl, %cl +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovnel %edx, %ecx +; LINUX-NEXT: lock cmpxchg8b sc64 +; LINUX-NEXT: jne .LBB0_1 +; LINUX-NEXT: # BB#2: # %atomicrmw.end +; LINUX-NEXT: movl sc64+4, %edx +; LINUX-NEXT: movl sc64, %eax +; LINUX-NEXT: .p2align 4, 0x90 +; LINUX-NEXT: .LBB0_3: # %atomicrmw.start2 +; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 +; LINUX-NEXT: cmpl $7, %eax +; LINUX-NEXT: movl %edx, %ecx +; LINUX-NEXT: sbbl $0, %ecx +; LINUX-NEXT: setl %cl +; LINUX-NEXT: andb $1, %cl +; LINUX-NEXT: movl $6, %ebx +; LINUX-NEXT: cmovnel %eax, %ebx +; LINUX-NEXT: testb %cl, %cl +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovnel %edx, %ecx +; LINUX-NEXT: lock cmpxchg8b sc64 +; LINUX-NEXT: jne .LBB0_3 +; LINUX-NEXT: # BB#4: # %atomicrmw.end1 +; LINUX-NEXT: movl sc64+4, %edx +; LINUX-NEXT: movl sc64, %eax +; LINUX-NEXT: movl $7, %esi +; LINUX-NEXT: .p2align 4, 0x90 +; LINUX-NEXT: .LBB0_5: # %atomicrmw.start8 +; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 +; LINUX-NEXT: cmpl %eax, %esi +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: sbbl %edx, %ecx +; LINUX-NEXT: setb %cl +; LINUX-NEXT: andb $1, %cl +; LINUX-NEXT: movl $7, %ebx +; LINUX-NEXT: cmovnel %eax, %ebx +; LINUX-NEXT: testb %cl, %cl +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovnel %edx, %ecx +; LINUX-NEXT: lock cmpxchg8b sc64 +; LINUX-NEXT: jne .LBB0_5 +; LINUX-NEXT: # BB#6: # %atomicrmw.end7 +; LINUX-NEXT: movl sc64+4, %edx +; LINUX-NEXT: movl sc64, %eax +; LINUX-NEXT: .p2align 4, 0x90 +; LINUX-NEXT: .LBB0_7: # %atomicrmw.start14 +; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 +; LINUX-NEXT: cmpl $9, %eax +; LINUX-NEXT: movl %edx, %ecx +; LINUX-NEXT: sbbl $0, %ecx +; LINUX-NEXT: setb %cl +; LINUX-NEXT: andb $1, %cl +; LINUX-NEXT: movl $8, %ebx +; LINUX-NEXT: cmovnel %eax, %ebx +; LINUX-NEXT: testb %cl, %cl +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovnel %edx, %ecx +; LINUX-NEXT: lock cmpxchg8b sc64 +; LINUX-NEXT: jne .LBB0_7 +; LINUX-NEXT: # BB#8: # %atomicrmw.end13 +; LINUX-NEXT: popl %esi +; LINUX-NEXT: popl %ebx +; LINUX-NEXT: retl +; +; PIC-LABEL: atomic_maxmin_i6432: +; PIC: ## BB#0: +; PIC-NEXT: pushl %ebx +; PIC-NEXT: Lcfi0: +; PIC-NEXT: .cfi_def_cfa_offset 8 +; PIC-NEXT: pushl %edi +; PIC-NEXT: Lcfi1: +; PIC-NEXT: .cfi_def_cfa_offset 12 +; PIC-NEXT: pushl %esi +; PIC-NEXT: Lcfi2: +; PIC-NEXT: .cfi_def_cfa_offset 16 +; PIC-NEXT: Lcfi3: +; PIC-NEXT: .cfi_offset %esi, -16 +; PIC-NEXT: Lcfi4: +; PIC-NEXT: .cfi_offset %edi, -12 +; PIC-NEXT: Lcfi5: +; PIC-NEXT: .cfi_offset %ebx, -8 +; PIC-NEXT: calll L0$pb +; PIC-NEXT: Lcfi6: +; PIC-NEXT: .cfi_adjust_cfa_offset 4 +; PIC-NEXT: L0$pb: +; PIC-NEXT: popl %eax +; PIC-NEXT: Lcfi7: +; PIC-NEXT: .cfi_adjust_cfa_offset -4 +; PIC-NEXT: movl L_sc64$non_lazy_ptr-L0$pb(%eax), %esi +; PIC-NEXT: movl (%esi), %eax +; PIC-NEXT: movl 4(%esi), %edx +; PIC-NEXT: movl $4, %edi +; PIC-NEXT: .p2align 4, 0x90 +; PIC-NEXT: LBB0_1: ## %atomicrmw.start +; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 +; PIC-NEXT: cmpl %eax, %edi +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: sbbl %edx, %ecx +; PIC-NEXT: setl %cl +; PIC-NEXT: andb $1, %cl +; PIC-NEXT: movl $5, %ebx +; PIC-NEXT: cmovnel %eax, %ebx +; PIC-NEXT: testb %cl, %cl +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovnel %edx, %ecx +; PIC-NEXT: lock cmpxchg8b (%esi) +; PIC-NEXT: jne LBB0_1 +; PIC-NEXT: ## BB#2: ## %atomicrmw.end +; PIC-NEXT: movl (%esi), %eax +; PIC-NEXT: movl 4(%esi), %edx +; PIC-NEXT: .p2align 4, 0x90 +; PIC-NEXT: LBB0_3: ## %atomicrmw.start2 +; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 +; PIC-NEXT: cmpl $7, %eax +; PIC-NEXT: movl %edx, %ecx +; PIC-NEXT: sbbl $0, %ecx +; PIC-NEXT: setl %cl +; PIC-NEXT: andb $1, %cl +; PIC-NEXT: movl $6, %ebx +; PIC-NEXT: cmovnel %eax, %ebx +; PIC-NEXT: testb %cl, %cl +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovnel %edx, %ecx +; PIC-NEXT: lock cmpxchg8b (%esi) +; PIC-NEXT: jne LBB0_3 +; PIC-NEXT: ## BB#4: ## %atomicrmw.end1 +; PIC-NEXT: movl (%esi), %eax +; PIC-NEXT: movl 4(%esi), %edx +; PIC-NEXT: movl $7, %edi +; PIC-NEXT: .p2align 4, 0x90 +; PIC-NEXT: LBB0_5: ## %atomicrmw.start8 +; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 +; PIC-NEXT: cmpl %eax, %edi +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: sbbl %edx, %ecx +; PIC-NEXT: setb %cl +; PIC-NEXT: andb $1, %cl +; PIC-NEXT: movl $7, %ebx +; PIC-NEXT: cmovnel %eax, %ebx +; PIC-NEXT: testb %cl, %cl +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovnel %edx, %ecx +; PIC-NEXT: lock cmpxchg8b (%esi) +; PIC-NEXT: jne LBB0_5 +; PIC-NEXT: ## BB#6: ## %atomicrmw.end7 +; PIC-NEXT: movl (%esi), %eax +; PIC-NEXT: movl 4(%esi), %edx +; PIC-NEXT: .p2align 4, 0x90 +; PIC-NEXT: LBB0_7: ## %atomicrmw.start14 +; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 +; PIC-NEXT: cmpl $9, %eax +; PIC-NEXT: movl %edx, %ecx +; PIC-NEXT: sbbl $0, %ecx +; PIC-NEXT: setb %cl +; PIC-NEXT: andb $1, %cl +; PIC-NEXT: movl $8, %ebx +; PIC-NEXT: cmovnel %eax, %ebx +; PIC-NEXT: testb %cl, %cl +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovnel %edx, %ecx +; PIC-NEXT: lock cmpxchg8b (%esi) +; PIC-NEXT: jne LBB0_7 +; PIC-NEXT: ## BB#8: ## %atomicrmw.end13 +; PIC-NEXT: popl %esi +; PIC-NEXT: popl %edi +; PIC-NEXT: popl %ebx +; PIC-NEXT: retl %1 = atomicrmw max i64* @sc64, i64 5 acquire -; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] -; LINUX: cmpl -; LINUX: sbbl -; LINUX: cmovne -; LINUX: cmovne -; LINUX: lock cmpxchg8b -; LINUX: jne [[LABEL]] %2 = atomicrmw min i64* @sc64, i64 6 acquire -; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] -; LINUX: cmpl -; LINUX: sbbl -; LINUX: cmovne -; LINUX: cmovne -; LINUX: lock cmpxchg8b -; LINUX: jne [[LABEL]] %3 = atomicrmw umax i64* @sc64, i64 7 acquire -; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] -; LINUX: cmpl -; LINUX: sbbl -; LINUX: cmovne -; LINUX: cmovne -; LINUX: lock cmpxchg8b -; LINUX: jne [[LABEL]] %4 = atomicrmw umin i64* @sc64, i64 8 acquire -; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] -; LINUX: cmpl -; LINUX: sbbl -; LINUX: cmovne -; LINUX: cmovne -; LINUX: lock cmpxchg8b -; LINUX: jne [[LABEL]] ret void } @@ -44,9 +209,62 @@ define void @atomic_maxmin_i6432() { @id = internal global i64 0, align 8 define void @tf_bug(i8* %ptr) nounwind { +; LINUX-LABEL: tf_bug: +; LINUX: # BB#0: +; LINUX-NEXT: pushl %ebx +; LINUX-NEXT: pushl %esi +; LINUX-NEXT: movl {{[0-9]+}}(%esp), %esi +; LINUX-NEXT: movl id+4, %edx +; LINUX-NEXT: movl id, %eax +; LINUX-NEXT: .p2align 4, 0x90 +; LINUX-NEXT: .LBB1_1: # %atomicrmw.start +; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 +; LINUX-NEXT: movl %eax, %ebx +; LINUX-NEXT: addl $1, %ebx +; LINUX-NEXT: movl %edx, %ecx +; LINUX-NEXT: adcl $0, %ecx +; LINUX-NEXT: lock cmpxchg8b id +; LINUX-NEXT: jne .LBB1_1 +; LINUX-NEXT: # BB#2: # %atomicrmw.end +; LINUX-NEXT: addl $1, %eax +; LINUX-NEXT: adcl $0, %edx +; LINUX-NEXT: movl %eax, (%esi) +; LINUX-NEXT: movl %edx, 4(%esi) +; LINUX-NEXT: popl %esi +; LINUX-NEXT: popl %ebx +; LINUX-NEXT: retl +; ; PIC-LABEL: tf_bug: -; PIC-DAG: movl _id-L1$pb( -; PIC-DAG: movl (_id-L1$pb)+4( +; PIC: ## BB#0: +; PIC-NEXT: pushl %ebx +; PIC-NEXT: pushl %edi +; PIC-NEXT: pushl %esi +; PIC-NEXT: calll L1$pb +; PIC-NEXT: L1$pb: +; PIC-NEXT: popl %edi +; PIC-NEXT: movl {{[0-9]+}}(%esp), %esi +; PIC-NEXT: movl (_id-L1$pb)+4(%edi), %edx +; PIC-NEXT: movl _id-L1$pb(%edi), %eax +; PIC-NEXT: .p2align 4, 0x90 +; PIC-NEXT: LBB1_1: ## %atomicrmw.start +; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 +; PIC-NEXT: movl %eax, %ebx +; PIC-NEXT: addl $1, %ebx +; PIC-NEXT: movl %edx, %ecx +; PIC-NEXT: adcl $0, %ecx +; PIC-NEXT: lock cmpxchg8b _id-L1$pb(%edi) +; PIC-NEXT: jne LBB1_1 +; PIC-NEXT: ## BB#2: ## %atomicrmw.end +; PIC-NEXT: addl $1, %eax +; PIC-NEXT: adcl $0, %edx +; PIC-NEXT: movl %eax, (%esi) +; PIC-NEXT: movl %edx, 4(%esi) +; PIC-NEXT: popl %esi +; PIC-NEXT: popl %edi +; PIC-NEXT: popl %ebx +; PIC-NEXT: retl +; PIC-NEXT: +; PIC-NEXT: .zerofill __DATA,__bss,_id,8,3 ## @id %tmp1 = atomicrmw add i64* @id, i64 1 seq_cst %tmp2 = add i64 %tmp1, 1 %tmp3 = bitcast i8* %ptr to i64* diff --git a/test/CodeGen/X86/atomic128.ll b/test/CodeGen/X86/atomic128.ll index 1bf7bfbfa26..77bbdec826a 100644 --- a/test/CodeGen/X86/atomic128.ll +++ b/test/CodeGen/X86/atomic128.ll @@ -1,20 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s @var = global i128 0 -define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { -; CHECK-LABEL: val_compare_and_swap: ; Due to the scheduling right after isel for cmpxchg and given the ; machine scheduler and copy coalescer do not mess up with physical ; register live-ranges, we end up with a useless copy. -; -; CHECK: movq %rcx, [[TMP:%r[0-9a-z]+]] -; CHECK: movq %rsi, %rax -; CHECK: movq %r8, %rcx -; CHECK: movq [[TMP]], %rbx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) - +define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LABEL: val_compare_and_swap: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi1: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: movq %r9, %rbx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire %val = extractvalue { i128, i1 } %pair, 0 ret i128 %val @@ -22,24 +28,31 @@ define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { define void @fetch_and_nand(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_nand: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: movq %rdx, %rcx -; CHECK: andq [[INCHI]], %rcx -; CHECK: movq %rax, %rbx - ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. -; CHECK: andq %rsi, %rbx -; CHECK: notq %rbx -; CHECK: notq %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi3: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: andq %r8, %rcx +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: andq %rsi, %rbx +; CHECK-NEXT: notq %rbx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB1_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw nand i128* %p, i128 %bits release store i128 %val, i128* @var, align 16 ret void @@ -47,23 +60,29 @@ define void @fetch_and_nand(i128* %p, i128 %bits) { define void @fetch_and_or(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_or: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: movq %rax, %rbx - ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. -; CHECK: orq %rsi, %rbx -; CHECK: movq %rdx, %rcx -; CHECK: orq [[INCHI]], %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi4: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi5: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB2_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: orq %rsi, %rbx +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: orq %r8, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB2_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw or i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -71,23 +90,29 @@ define void @fetch_and_or(i128* %p, i128 %bits) { define void @fetch_and_add(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_add: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: movq %rax, %rbx - ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. -; CHECK: addq %rsi, %rbx -; CHECK: movq %rdx, %rcx -; CHECK: adcq [[INCHI]], %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi6: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi7: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB3_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: addq %rsi, %rbx +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: adcq %r8, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB3_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw add i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -95,23 +120,29 @@ define void @fetch_and_add(i128* %p, i128 %bits) { define void @fetch_and_sub(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_sub: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: movq %rax, %rbx - ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. -; CHECK: subq %rsi, %rbx -; CHECK: movq %rdx, %rcx -; CHECK: sbbq [[INCHI]], %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi8: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi9: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB4_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: subq %rsi, %rbx +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: sbbq %r8, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB4_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw sub i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -119,24 +150,35 @@ define void @fetch_and_sub(i128* %p, i128 %bits) { define void @fetch_and_min(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_min: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq -; CHECK: sbbq -; CHECK: setg -; CHECK: cmovneq %rax, %rbx -; CHECK: movq [[INCHI]], %rcx -; CHECK: cmovneq %rdx, %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi10: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi11: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB5_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: sbbq %rdx, %rcx +; CHECK-NEXT: setge %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovneq %rax, %rbx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: cmovneq %rdx, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB5_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw min i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -144,24 +186,35 @@ define void @fetch_and_min(i128* %p, i128 %bits) { define void @fetch_and_max(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_max: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq -; CHECK: sbbq -; CHECK: setge -; CHECK: cmovneq %rax, %rbx -; CHECK: movq [[INCHI]], %rcx -; CHECK: cmovneq %rdx, %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi12: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi13: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB6_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpq %rsi, %rax +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: sbbq %r8, %rcx +; CHECK-NEXT: setge %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovneq %rax, %rbx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: cmovneq %rdx, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB6_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw max i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -169,24 +222,35 @@ define void @fetch_and_max(i128* %p, i128 %bits) { define void @fetch_and_umin(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_umin: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq -; CHECK: sbbq -; CHECK: seta -; CHECK: cmovneq %rax, %rbx -; CHECK: movq [[INCHI]], %rcx -; CHECK: cmovneq %rdx, %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi14: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi15: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB7_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: sbbq %rdx, %rcx +; CHECK-NEXT: setae %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovneq %rax, %rbx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: cmovneq %rdx, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB7_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw umin i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -194,24 +258,35 @@ define void @fetch_and_umin(i128* %p, i128 %bits) { define void @fetch_and_umax(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_umax: -; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] -; CHECK-DAG: movq (%rdi), %rax -; CHECK-DAG: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq -; CHECK: sbbq -; CHECK: setb -; CHECK: cmovneq %rax, %rbx -; CHECK: movq [[INCHI]], %rcx -; CHECK: cmovneq %rdx, %rcx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - -; CHECK: movq %rax, _var -; CHECK: movq %rdx, _var+8 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi16: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi17: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB8_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: sbbq %rdx, %rcx +; CHECK-NEXT: setb %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovneq %rax, %rbx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: cmovneq %rdx, %rcx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB8_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %val = atomicrmw umax i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -219,75 +294,110 @@ define void @fetch_and_umax(i128* %p, i128 %bits) { define i128 @atomic_load_seq_cst(i128* %p) { ; CHECK-LABEL: atomic_load_seq_cst: -; CHECK: xorl %eax, %eax -; CHECK: xorl %edx, %edx -; CHECK: xorl %ecx, %ecx -; CHECK: xorl %ebx, %ebx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi18: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi19: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %r = load atomic i128, i128* %p seq_cst, align 16 ret i128 %r } define i128 @atomic_load_relaxed(i128* %p) { -; CHECK: atomic_load_relaxed: -; CHECK: xorl %eax, %eax -; CHECK: xorl %edx, %edx -; CHECK: xorl %ecx, %ecx -; CHECK: xorl %ebx, %ebx -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) - +; CHECK-LABEL: atomic_load_relaxed: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi20: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi21: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %r = load atomic i128, i128* %p monotonic, align 16 ret i128 %r } define void @atomic_store_seq_cst(i128* %p, i128 %in) { ; CHECK-LABEL: atomic_store_seq_cst: -; CHECK: movq %rdx, %rcx -; CHECK: movq %rsi, %rbx -; CHECK: movq (%rdi), %rax -; CHECK: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] -; CHECK-NOT: callq ___sync_lock_test_and_set_16 - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi22: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi23: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB11_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB11_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq store atomic i128 %in, i128* %p seq_cst, align 16 ret void } define void @atomic_store_release(i128* %p, i128 %in) { ; CHECK-LABEL: atomic_store_release: -; CHECK: movq %rdx, %rcx -; CHECK: movq %rsi, %rbx -; CHECK: movq (%rdi), %rax -; CHECK: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi24: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi25: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB12_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB12_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq store atomic i128 %in, i128* %p release, align 16 ret void } define void @atomic_store_relaxed(i128* %p, i128 %in) { ; CHECK-LABEL: atomic_store_relaxed: -; CHECK: movq %rdx, %rcx -; CHECK: movq %rsi, %rbx -; CHECK: movq (%rdi), %rax -; CHECK: movq 8(%rdi), %rdx - -; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: lock -; CHECK: cmpxchg16b (%rdi) -; CHECK: jne [[LOOP]] - +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi26: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Lcfi27: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB13_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lock cmpxchg16b (%rdi) +; CHECK-NEXT: jne LBB13_1 +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq store atomic i128 %in, i128* %p unordered, align 16 ret void } diff --git a/test/CodeGen/X86/pr26350.ll b/test/CodeGen/X86/pr26350.ll index 6e87cb3e8b7..b626d9d2364 100644 --- a/test/CodeGen/X86/pr26350.ll +++ b/test/CodeGen/X86/pr26350.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -disable-constant-hoisting < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-unknown-linux-gnu" @@ -5,6 +6,18 @@ target triple = "i386-unknown-linux-gnu" @d = global i32 8, align 4 define i32 @main() { +; CHECK-LABEL: main: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl d, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: addl %eax, %eax +; CHECK-NEXT: andl $16, %eax +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: sbbl $0, %ecx +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retl entry: %load = load i32, i32* @d, align 4 %conv1 = zext i32 %load to i64 @@ -14,8 +27,3 @@ entry: %zext = zext i1 %cmp to i32 ret i32 %zext } -; CHECK: main: -; CHECK: movl d, %[[load:.*]] -; CHECK: movl %[[load]], %[[copy:.*]] -; CHECK: shrl $31, %[[copy]] -; CHECK: addl %[[load]], %[[load]] diff --git a/test/CodeGen/X86/pr29170.ll b/test/CodeGen/X86/pr29170.ll index d8e27557ab9..ecb4c978536 100644 --- a/test/CodeGen/X86/pr29170.ll +++ b/test/CodeGen/X86/pr29170.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" @@ -5,10 +6,26 @@ target triple = "i386-unknown-linux-gnu" @b = global i16 0, align 4 -; CHECK-LABEL: @main -; CHECK: cmpl -; CHECK: sbbl define i32 @main() { +; CHECK-LABEL: main: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # BB#1: # %go +; CHECK-NEXT: movl $-1, %ecx +; CHECK-NEXT: movsbl b, %edx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: cmpl $-1, %edx +; CHECK-NEXT: sbbl %ecx, %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # BB#2: # %if.then +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_3: # %if.else +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: %true = icmp eq i32 0, 0 %const = bitcast i64 -4294967296 to i64 diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll index a5ff2775802..391f1cc9fb4 100644 --- a/test/CodeGen/X86/setcc-lowering.ll +++ b/test/CodeGen/X86/setcc-lowering.ll @@ -20,6 +20,19 @@ define <8 x i16> @pr25080(<8 x i32> %a) { ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; +; KNL-32-LABEL: pr25080: +; KNL-32: # BB#0: # %entry +; KNL-32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1 +; KNL-32-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-32-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; KNL-32-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; KNL-32-NEXT: movb $15, %al +; KNL-32-NEXT: kmovw %eax, %k1 +; KNL-32-NEXT: korw %k1, %k0, %k1 +; KNL-32-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-32-NEXT: vpmovqw %zmm0, %xmm0 +; KNL-32-NEXT: retl entry: %0 = trunc <8 x i32> %a to <8 x i23> %1 = icmp eq <8 x i23> %0, zeroinitializer @@ -29,6 +42,18 @@ entry: } define void @pr26232(i64 %a) { +; AVX-LABEL: pr26232: +; AVX: # BB#0: # %for_loop599.preheader +; AVX-NEXT: .p2align 4, 0x90 +; AVX-NEXT: .LBB1_1: # %for_loop599 +; AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX-NEXT: cmpq $65536, %rdi # imm = 0x10000 +; AVX-NEXT: setl -{{[0-9]+}}(%rsp) +; AVX-NEXT: cmpw $0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: jne .LBB1_1 +; AVX-NEXT: # BB#2: # %for_exit600 +; AVX-NEXT: retq +; ; KNL-32-LABEL: pr26232: ; KNL-32: # BB#0: # %for_loop599.preheader ; KNL-32-NEXT: pushl %esi diff --git a/test/CodeGen/X86/sext-i1.ll b/test/CodeGen/X86/sext-i1.ll index 9b86cd0c9a2..c10667dce70 100644 --- a/test/CodeGen/X86/sext-i1.ll +++ b/test/CodeGen/X86/sext-i1.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=CHECK --check-prefix=X32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=CHECK --check-prefix=X64 @@ -6,24 +6,34 @@ ; PR6146 define i32 @t1(i32 %x) nounwind readnone ssp { -; CHECK-LABEL: t1: -; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: ret +; X32-LABEL: t1: +; X32: # BB#0: +; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; X32-NEXT: sbbl %eax, %eax +; X32-NEXT: retl ; +; X64-LABEL: t1: +; X64: # BB#0: +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: retq %t0 = icmp eq i32 %x, 0 %if = select i1 %t0, i32 -1, i32 0 ret i32 %if } define i32 @t2(i32 %x) nounwind readnone ssp { -; CHECK-LABEL: t2: -; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: ret +; X32-LABEL: t2: +; X32: # BB#0: +; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; X32-NEXT: sbbl %eax, %eax +; X32-NEXT: retl ; +; X64-LABEL: t2: +; X64: # BB#0: +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: retq %t0 = icmp eq i32 %x, 0 %if = sext i1 %t0 to i32 ret i32 %if @@ -46,7 +56,6 @@ define i32 @t3() nounwind readonly { ; X64-NEXT: cmpq %rax, %rax ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq -; entry: %not.tobool = icmp eq i32 undef, 0 %cond = sext i1 %not.tobool to i32 @@ -80,7 +89,6 @@ define i32 @t4(i64 %x) nounwind readnone ssp { ; X64-NEXT: cmpq $1, %rdi ; X64-NEXT: sbbl %eax, %eax ; X64-NEXT: retq -; %t0 = icmp eq i64 %x, 0 %t1 = sext i1 %t0 to i32 ret i32 %t1 @@ -99,7 +107,6 @@ define i64 @t5(i32 %x) nounwind readnone ssp { ; X64-NEXT: cmpl $1, %edi ; X64-NEXT: sbbq %rax, %rax ; X64-NEXT: retq -; %t0 = icmp eq i32 %x, 0 %t1 = sext i1 %t0 to i64 ret i64 %t1 diff --git a/test/CodeGen/X86/wide-integer-cmp.ll b/test/CodeGen/X86/wide-integer-cmp.ll index c45a0541e6a..fbaf500e833 100644 --- a/test/CodeGen/X86/wide-integer-cmp.ll +++ b/test/CodeGen/X86/wide-integer-cmp.ll @@ -1,7 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s define i32 @branch_eq(i64 %a, i64 %b) { +; CHECK-LABEL: branch_eq: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # BB#1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: retl entry: %cmp = icmp eq i64 %a, %b br i1 %cmp, label %bb1, label %bb2 @@ -9,22 +24,22 @@ bb1: ret i32 1 bb2: ret i32 2 - -; CHECK-LABEL: branch_eq: -; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] -; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] -; CHECK: xorl 16(%esp), [[LHSHi]] -; CHECK: xorl 12(%esp), [[LHSLo]] -; CHECK: orl [[LHSHi]], [[LHSLo]] -; CHECK: jne [[FALSE:.LBB[0-9_]+]] -; CHECK: movl $1, %eax -; CHECK: retl -; CHECK: [[FALSE]]: -; CHECK: movl $2, %eax -; CHECK: retl } define i32 @branch_slt(i64 %a, i64 %b) { +; CHECK-LABEL: branch_slt: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: jge .LBB1_2 +; CHECK-NEXT: # BB#1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_2: # %bb2 +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: retl entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 @@ -32,21 +47,22 @@ bb1: ret i32 1 bb2: ret i32 2 - -; CHECK-LABEL: branch_slt: -; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] -; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] -; CHECK: cmpl 12(%esp), [[LHSLo]] -; CHECK: sbbl 16(%esp), [[LHSHi]] -; CHECK: jge [[FALSE:.LBB[0-9_]+]] -; CHECK: movl $1, %eax -; CHECK: retl -; CHECK: [[FALSE]]: -; CHECK: movl $2, %eax -; CHECK: retl } define i32 @branch_ule(i64 %a, i64 %b) { +; CHECK-LABEL: branch_ule: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: jb .LBB2_2 +; CHECK-NEXT: # BB#1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_2: # %bb2 +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: retl entry: %cmp = icmp ule i64 %a, %b br i1 %cmp, label %bb1, label %bb2 @@ -54,36 +70,49 @@ bb1: ret i32 1 bb2: ret i32 2 - -; CHECK-LABEL: branch_ule: -; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] -; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] -; CHECK: cmpl 4(%esp), [[RHSLo]] -; CHECK: sbbl 8(%esp), [[RHSHi]] -; CHECK: jb [[FALSE:.LBB[0-9_]+]] -; CHECK: movl $1, %eax -; CHECK: retl -; CHECK: [[FALSE]]: -; CHECK: movl $2, %eax -; CHECK: retl } define i32 @set_gt(i64 %a, i64 %b) { +; CHECK-LABEL: set_gt: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: setl %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl entry: %cmp = icmp sgt i64 %a, %b %res = select i1 %cmp, i32 1, i32 0 ret i32 %res - -; CHECK-LABEL: set_gt: -; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] -; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] -; CHECK: cmpl 4(%esp), [[RHSLo]] -; CHECK: sbbl 8(%esp), [[RHSHi]] -; CHECK: setl %al -; CHECK: retl } define i32 @test_wide(i128 %a, i128 %b) { +; CHECK-LABEL: test_wide: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: jge .LBB4_2 +; CHECK-NEXT: # BB#1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB4_2: # %bb2 +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %cmp = icmp slt i128 %a, %b br i1 %cmp, label %bb1, label %bb2 @@ -91,21 +120,22 @@ bb1: ret i32 1 bb2: ret i32 2 - -; CHECK-LABEL: test_wide: -; CHECK: cmpl 24(%esp) -; CHECK: sbbl 28(%esp) -; CHECK: sbbl 32(%esp) -; CHECK: sbbl 36(%esp) -; CHECK: jge [[FALSE:.LBB[0-9_]+]] -; CHECK: movl $1, %eax -; CHECK: retl -; CHECK: [[FALSE]]: -; CHECK: movl $2, %eax -; CHECK: retl } +; The comparison of the low bits will be folded to a CARRY_FALSE node. Make +; sure the code can handle that. define i32 @test_carry_false(i64 %a, i64 %b) { +; CHECK-LABEL: test_carry_false: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jge .LBB5_2 +; CHECK-NEXT: # BB#1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB5_2: # %bb2 +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: retl entry: %x = and i64 %a, -4294967296 ;0xffffffff00000000 %y = and i64 %b, -4294967296 @@ -115,16 +145,4 @@ bb1: ret i32 1 bb2: ret i32 2 - -; The comparison of the low bits will be folded to a CARRY_FALSE node. Make -; sure the code can handle that. -; CHECK-LABEL: carry_false: -; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] -; CHECK: cmpl 16(%esp), [[LHSHi]] -; CHECK: jge [[FALSE:.LBB[0-9_]+]] -; CHECK: movl $1, %eax -; CHECK: retl -; CHECK: [[FALSE]]: -; CHECK: movl $2, %eax -; CHECK: retl }