From: Amaury Sechet Date: Mon, 6 Feb 2017 16:21:41 +0000 (+0000) Subject: Commit full codegen for mul-i256.ll . NFC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=86052d835698fa5b75463168b09d5a164aceb7ee;p=llvm Commit full codegen for mul-i256.ll . NFC The full codegen is committed for larger multiply, so that won't make the test suite more fragile. However, it'll allow to expose the effects fo various DAG combine. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294196 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/mul-i256.ll b/test/CodeGen/X86/mul-i256.ll index 8f207b8dd08..bb2989b9298 100644 --- a/test/CodeGen/X86/mul-i256.ll +++ b/test/CodeGen/X86/mul-i256.ll @@ -1,8 +1,284 @@ -; RUN: llc < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @test(i256* %a, i256* %b, i256* %out) #0 { +; X32-LABEL: test: +; X32: # BB#0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .Lcfi0: +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .Lcfi1: +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .Lcfi2: +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $168, %esp +; X32-NEXT: .Lcfi3: +; X32-NEXT: .cfi_offset %esi, -20 +; X32-NEXT: .Lcfi4: +; X32-NEXT: .cfi_offset %edi, -16 +; X32-NEXT: .Lcfi5: +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl 16(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 20(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 24(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 28(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 8(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 12(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 4(%eax), %ebx +; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl 16(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 20(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 24(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 28(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 4(%eax), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 8(%eax), %esi +; X32-NEXT: movl 12(%eax), %edi +; X32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl %edi +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: pushl %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %eax +; X32-NEXT: calll __multi3 +; X32-NEXT: addl $32, %esp +; X32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %eax +; X32-NEXT: calll __multi3 +; X32-NEXT: addl $32, %esp +; X32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %eax +; X32-NEXT: calll __multi3 +; X32-NEXT: addl $32, %esp +; X32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl %esi +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %eax +; X32-NEXT: calll __multi3 +; X32-NEXT: addl $32, %esp +; X32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl %esi +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %eax +; X32-NEXT: calll __multi3 +; X32-NEXT: addl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %eax +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl %eax, %ebx +; X32-NEXT: adcl $0, %edx +; X32-NEXT: sbbl %eax, %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: addl {{[0-9]+}}(%esp), %edi +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %eax +; X32-NEXT: calll __multi3 +; X32-NEXT: addl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: addl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl %edi, %esi +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl 16(%ebp), %edi +; X32-NEXT: movl %ebx, 4(%edi) +; X32-NEXT: movl 16(%ebp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, (%ebx) +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, 8(%ebx) +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, 12(%ebx) +; X32-NEXT: movl %esi, 16(%ebx) +; X32-NEXT: movl %ecx, 20(%ebx) +; X32-NEXT: movl %edx, 24(%ebx) +; X32-NEXT: movl %eax, 28(%ebx) +; X32-NEXT: leal -12(%ebp), %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: # %entry +; X64-NEXT: pushq %r15 +; X64-NEXT: .Lcfi0: +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r14 +; X64-NEXT: .Lcfi1: +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %r12 +; X64-NEXT: .Lcfi2: +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: pushq %rbx +; X64-NEXT: .Lcfi3: +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: .Lcfi4: +; X64-NEXT: .cfi_offset %rbx, -40 +; X64-NEXT: .Lcfi5: +; X64-NEXT: .cfi_offset %r12, -32 +; X64-NEXT: .Lcfi6: +; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: .Lcfi7: +; X64-NEXT: .cfi_offset %r15, -16 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq (%rdi), %r14 +; X64-NEXT: movq 8(%rdi), %r8 +; X64-NEXT: movq 16(%rdi), %rcx +; X64-NEXT: movq 16(%rsi), %rbx +; X64-NEXT: movq (%rsi), %r12 +; X64-NEXT: movq 8(%rsi), %r15 +; X64-NEXT: movq 24(%rdi), %rdi +; X64-NEXT: imulq %r12, %rdi +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rdi, %rdx +; X64-NEXT: imulq %r15, %rcx +; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: movq %rbx, %rdi +; X64-NEXT: imulq %r8, %rdi +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r14 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rdi, %rdx +; X64-NEXT: movq 24(%rsi), %rbx +; X64-NEXT: imulq %r14, %rbx +; X64-NEXT: addq %rdx, %rbx +; X64-NEXT: addq %r9, %r11 +; X64-NEXT: adcq %rcx, %rbx +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rsi, %rdi +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rdi, %r14 +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: addq %rcx, %rsi +; X64-NEXT: sbbq %rcx, %rcx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r15 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: addq %r11, %rax +; X64-NEXT: adcq %rbx, %rdx +; X64-NEXT: movq %r9, (%r10) +; X64-NEXT: movq %r14, 8(%r10) +; X64-NEXT: movq %rax, 16(%r10) +; X64-NEXT: movq %rdx, 24(%r10) +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: retq entry: %av = load i256, i256* %a %bv = load i256, i256* %b @@ -11,22 +287,4 @@ entry: ret void } -; CHECK-LABEL: @test -; There is a lot of inter-register motion, and so matching the instruction -; sequence will be fragile. There should be 6 underlying multiplications. -; CHECK: imulq -; CHECK: mulq -; CHECK: imulq -; CHECK: imulq -; CHECK: mulq -; CHECK: imulq -; CHECK: mulq -; CHECK: mulq -; CHECK: mulq -; CHECK: mulq -; CHECK-NOT: imulq -; CHECK-NOT: mulq -; CHECK: retq - attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } -