From ace71433ab78b1ff39b372eedca25f35a5095efc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 14 Mar 2017 14:03:16 +0000 Subject: [PATCH] [X86] Add extra BITREVERSE tests Test on 32-bit and 64-bit targets. Add bitreverse tests for i64, i32 and i16 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297741 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/bitreverse.ll | 573 +++++++++++++++++++++++++-------- 1 file changed, 442 insertions(+), 131 deletions(-) diff --git a/test/CodeGen/X86/bitreverse.ll b/test/CodeGen/X86/bitreverse.ll index 35cbbdafb46..06daf014c15 100644 --- a/test/CodeGen/X86/bitreverse.ll +++ b/test/CodeGen/X86/bitreverse.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 ; These tests just check that the plumbing is in place for @llvm.bitreverse. The ; actual output is massive at the moment as llvm.bitreverse is not yet legal. @@ -7,100 +8,354 @@ declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind { -; CHECK-LABEL: test_bitreverse_v2i16: -; CHECK: # BB#0: -; CHECK-NEXT: movw {{[0-9]+}}(%esp), %cx -; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax -; CHECK-NEXT: rolw $8, %ax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $3855, %edx # imm = 0xF0F -; CHECK-NEXT: shll $4, %edx -; CHECK-NEXT: andl $61680, %eax # imm = 0xF0F0 -; CHECK-NEXT: shrl $4, %eax -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $13107, %edx # imm = 0x3333 -; CHECK-NEXT: andl $52428, %eax # imm = 0xCCCC -; CHECK-NEXT: shrl $2, %eax -; CHECK-NEXT: leal (%eax,%edx,4), %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $21845, %edx # imm = 0x5555 -; CHECK-NEXT: andl $43690, %eax # imm = 0xAAAA -; CHECK-NEXT: shrl %eax -; CHECK-NEXT: leal (%eax,%edx,2), %eax -; CHECK-NEXT: rolw $8, %cx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: andl $3855, %edx # imm = 0xF0F -; CHECK-NEXT: shll $4, %edx -; CHECK-NEXT: andl $61680, %ecx # imm = 0xF0F0 -; CHECK-NEXT: shrl $4, %ecx -; CHECK-NEXT: orl %edx, %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: andl $13107, %edx # imm = 0x3333 -; CHECK-NEXT: andl $52428, %ecx # imm = 0xCCCC -; CHECK-NEXT: shrl $2, %ecx -; CHECK-NEXT: leal (%ecx,%edx,4), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: andl $21845, %edx # imm = 0x5555 -; CHECK-NEXT: andl $43690, %ecx # imm = 0xAAAA -; CHECK-NEXT: shrl %ecx -; CHECK-NEXT: leal (%ecx,%edx,2), %edx -; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: # kill: %DX %DX %EDX -; CHECK-NEXT: retl +; X86-LABEL: test_bitreverse_v2i16: +; X86: # BB#0: +; X86-NEXT: movw {{[0-9]+}}(%esp), %cx +; X86-NEXT: movw {{[0-9]+}}(%esp), %ax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $3855, %edx # imm = 0xF0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: andl $61680, %eax # imm = 0xF0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $13107, %edx # imm = 0x3333 +; X86-NEXT: andl $52428, %eax # imm = 0xCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $21845, %edx # imm = 0x5555 +; X86-NEXT: andl $43690, %eax # imm = 0xAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: rolw $8, %cx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $3855, %edx # imm = 0xF0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: andl $61680, %ecx # imm = 0xF0F0 +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $13107, %edx # imm = 0x3333 +; X86-NEXT: andl $52428, %ecx # imm = 0xCCCC +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $21845, %edx # imm = 0x5555 +; X86-NEXT: andl $43690, %ecx # imm = 0xAAAA +; X86-NEXT: shrl %ecx +; X86-NEXT: leal (%ecx,%edx,2), %edx +; X86-NEXT: # kill: %AX %AX %EAX +; X86-NEXT: # kill: %DX %DX %EDX +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_v2i16: +; X64: # BB#0: +; X64-NEXT: pxor %xmm1, %xmm1 +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; X64-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] +; X64-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; X64-NEXT: packuswb %xmm2, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: pand %xmm1, %xmm2 +; X64-NEXT: psllw $4, %xmm2 +; X64-NEXT: movdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; X64-NEXT: pand %xmm3, %xmm2 +; X64-NEXT: pand %xmm3, %xmm0 +; X64-NEXT: psrlw $4, %xmm0 +; X64-NEXT: pand %xmm1, %xmm0 +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; X64-NEXT: pand %xmm0, %xmm1 +; X64-NEXT: psllw $2, %xmm1 +; X64-NEXT: pand {{.*}}(%rip), %xmm1 +; X64-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-NEXT: psrlw $2, %xmm0 +; X64-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-NEXT: por %xmm1, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] +; X64-NEXT: pand %xmm0, %xmm1 +; X64-NEXT: paddb %xmm1, %xmm1 +; X64-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-NEXT: psrlw $1, %xmm0 +; X64-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-NEXT: por %xmm1, %xmm0 +; X64-NEXT: psrlq $48, %xmm0 +; X64-NEXT: retq %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) ret <2 x i16> %b } +declare i64 @llvm.bitreverse.i64(i64) readnone + +define i64 @test_bitreverse_i64(i64 %a) nounwind { +; X86-LABEL: test_bitreverse_i64: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA +; X86-NEXT: shrl %ecx +; X86-NEXT: leal (%ecx,%edx,2), %edx +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_i64: +; X64: # BB#0: +; X64-NEXT: bswapq %rdi +; X64-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; X64-NEXT: andq %rdi, %rax +; X64-NEXT: shlq $4, %rax +; X64-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 +; X64-NEXT: andq %rdi, %rcx +; X64-NEXT: shrq $4, %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NEXT: andq %rcx, %rax +; X64-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC +; X64-NEXT: andq %rcx, %rdx +; X64-NEXT: shrq $2, %rdx +; X64-NEXT: leaq (%rdx,%rax,4), %rax +; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; X64-NEXT: andq %rax, %rdx +; X64-NEXT: shrq %rdx +; X64-NEXT: leaq (%rdx,%rcx,2), %rax +; X64-NEXT: retq + %b = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %b +} + +declare i32 @llvm.bitreverse.i32(i32) readnone + +define i32 @test_bitreverse_i32(i32 %a) nounwind { +; X86-LABEL: test_bitreverse_i32: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_i32: +; X64: # BB#0: +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: bswapl %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; X64-NEXT: shrl $4, %edi +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; X64-NEXT: shrl $2, %edi +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X64-NEXT: shrl %eax +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: retq + %b = call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %b +} + declare i24 @llvm.bitreverse.i24(i24) readnone define i24 @test_bitreverse_i24(i24 %a) nounwind { -; CHECK-LABEL: test_bitreverse_i24: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: bswapl %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F -; CHECK-NEXT: shll $4, %ecx -; CHECK-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 -; CHECK-NEXT: shrl $4, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; CHECK-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC -; CHECK-NEXT: shrl $2, %eax -; CHECK-NEXT: leal (%eax,%ecx,4), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $1431655680, %ecx # imm = 0x55555500 -; CHECK-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 -; CHECK-NEXT: shrl %eax -; CHECK-NEXT: leal (%eax,%ecx,2), %eax -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: retl +; X86-LABEL: test_bitreverse_i24: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655680, %ecx # imm = 0x55555500 +; X86-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: shrl $8, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_i24: +; X64: # BB#0: +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: bswapl %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; X64-NEXT: shrl $4, %edi +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; X64-NEXT: shrl $2, %edi +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655680, %ecx # imm = 0x55555500 +; X64-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 +; X64-NEXT: shrl %eax +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: shrl $8, %eax +; X64-NEXT: retq %b = call i24 @llvm.bitreverse.i24(i24 %a) ret i24 %b } +declare i16 @llvm.bitreverse.i16(i16) readnone + +define i16 @test_bitreverse_i16(i16 %a) nounwind { +; X86-LABEL: test_bitreverse_i16: +; X86: # BB#0: +; X86-NEXT: movw {{[0-9]+}}(%esp), %ax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $3855, %ecx # imm = 0xF0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $61680, %eax # imm = 0xF0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $13107, %ecx # imm = 0x3333 +; X86-NEXT: andl $52428, %eax # imm = 0xCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $21845, %ecx # imm = 0x5555 +; X86-NEXT: andl $43690, %eax # imm = 0xAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: # kill: %AX %AX %EAX +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_i16: +; X64: # BB#0: +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: rolw $8, %di +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $3855, %eax # imm = 0xF0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: andl $61680, %edi # imm = 0xF0F0 +; X64-NEXT: shrl $4, %edi +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $13107, %eax # imm = 0x3333 +; X64-NEXT: andl $52428, %edi # imm = 0xCCCC +; X64-NEXT: shrl $2, %edi +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $21845, %ecx # imm = 0x5555 +; X64-NEXT: andl $43690, %eax # imm = 0xAAAA +; X64-NEXT: shrl %eax +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: # kill: %AX %AX %EAX +; X64-NEXT: retq + %b = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %b +} + declare i8 @llvm.bitreverse.i8(i8) readnone define i8 @test_bitreverse_i8(i8 %a) { -; CHECK-LABEL: test_bitreverse_i8: -; CHECK: # BB#0: -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK-NEXT: rolb $4, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andb $51, %cl -; CHECK-NEXT: shlb $2, %cl -; CHECK-NEXT: andb $-52, %al -; CHECK-NEXT: shrb $2, %al -; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andb $85, %cl -; CHECK-NEXT: addb %cl, %cl -; CHECK-NEXT: andb $-86, %al -; CHECK-NEXT: shrb %al -; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: retl +; X86-LABEL: test_bitreverse_i8: +; X86: # BB#0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: rolb $4, %al +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andb $51, %cl +; X86-NEXT: shlb $2, %cl +; X86-NEXT: andb $-52, %al +; X86-NEXT: shrb $2, %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andb $85, %cl +; X86-NEXT: addb %cl, %cl +; X86-NEXT: andb $-86, %al +; X86-NEXT: shrb %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_i8: +; X64: # BB#0: +; X64-NEXT: rolb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $51, %al +; X64-NEXT: shlb $2, %al +; X64-NEXT: andb $-52, %dil +; X64-NEXT: shrb $2, %dil +; X64-NEXT: orb %al, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $85, %al +; X64-NEXT: addb %al, %al +; X64-NEXT: andb $-86, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: orb %al, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b } @@ -108,24 +363,43 @@ define i8 @test_bitreverse_i8(i8 %a) { declare i4 @llvm.bitreverse.i4(i4) readnone define i4 @test_bitreverse_i4(i4 %a) { -; CHECK-LABEL: test_bitreverse_i4: -; CHECK: # BB#0: -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK-NEXT: rolb $4, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andb $51, %cl -; CHECK-NEXT: shlb $2, %cl -; CHECK-NEXT: andb $-52, %al -; CHECK-NEXT: shrb $2, %al -; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andb $80, %cl -; CHECK-NEXT: addb %cl, %cl -; CHECK-NEXT: andb $-96, %al -; CHECK-NEXT: shrb %al -; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: shrb $4, %al -; CHECK-NEXT: retl +; X86-LABEL: test_bitreverse_i4: +; X86: # BB#0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: rolb $4, %al +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andb $51, %cl +; X86-NEXT: shlb $2, %cl +; X86-NEXT: andb $-52, %al +; X86-NEXT: shrb $2, %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andb $80, %cl +; X86-NEXT: addb %cl, %cl +; X86-NEXT: andb $-96, %al +; X86-NEXT: shrb %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: shrb $4, %al +; X86-NEXT: retl +; +; X64-LABEL: test_bitreverse_i4: +; X64: # BB#0: +; X64-NEXT: rolb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $51, %al +; X64-NEXT: shlb $2, %al +; X64-NEXT: andb $-52, %dil +; X64-NEXT: shrb $2, %dil +; X64-NEXT: orb %al, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $80, %al +; X64-NEXT: addb %al, %al +; X64-NEXT: andb $-96, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: orb %al, %dil +; X64-NEXT: shrb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b } @@ -133,38 +407,58 @@ define i4 @test_bitreverse_i4(i4 %a) { ; These tests check that bitreverse(constant) calls are folded define <2 x i16> @fold_v2i16() { -; CHECK-LABEL: fold_v2i16: -; CHECK: # BB#0: -; CHECK-NEXT: movw $-4096, %ax # imm = 0xF000 -; CHECK-NEXT: movw $240, %dx -; CHECK-NEXT: retl +; X86-LABEL: fold_v2i16: +; X86: # BB#0: +; X86-NEXT: movw $-4096, %ax # imm = 0xF000 +; X86-NEXT: movw $240, %dx +; X86-NEXT: retl +; +; X64-LABEL: fold_v2i16: +; X64: # BB#0: +; X64-NEXT: movaps {{.*#+}} xmm0 = [61440,240] +; X64-NEXT: retq %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> ) ret <2 x i16> %b } define i24 @fold_i24() { -; CHECK-LABEL: fold_i24: -; CHECK: # BB#0: -; CHECK-NEXT: movl $2048, %eax # imm = 0x800 -; CHECK-NEXT: retl +; X86-LABEL: fold_i24: +; X86: # BB#0: +; X86-NEXT: movl $2048, %eax # imm = 0x800 +; X86-NEXT: retl +; +; X64-LABEL: fold_i24: +; X64: # BB#0: +; X64-NEXT: movl $2048, %eax # imm = 0x800 +; X64-NEXT: retq %b = call i24 @llvm.bitreverse.i24(i24 4096) ret i24 %b } define i8 @fold_i8() { -; CHECK-LABEL: fold_i8: -; CHECK: # BB#0: -; CHECK-NEXT: movb $-16, %al -; CHECK-NEXT: retl +; X86-LABEL: fold_i8: +; X86: # BB#0: +; X86-NEXT: movb $-16, %al +; X86-NEXT: retl +; +; X64-LABEL: fold_i8: +; X64: # BB#0: +; X64-NEXT: movb $-16, %al +; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 15) ret i8 %b } define i4 @fold_i4() { -; CHECK-LABEL: fold_i4: -; CHECK: # BB#0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: retl +; X86-LABEL: fold_i4: +; X86: # BB#0: +; X86-NEXT: movb $1, %al +; X86-NEXT: retl +; +; X64-LABEL: fold_i4: +; X64: # BB#0: +; X64-NEXT: movb $1, %al +; X64-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 8) ret i4 %b } @@ -172,21 +466,30 @@ define i4 @fold_i4() { ; These tests check that bitreverse(bitreverse()) calls are removed define i8 @identity_i8(i8 %a) { -; CHECK-LABEL: identity_i8: -; CHECK: # BB#0: -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK-NEXT: retl +; X86-LABEL: identity_i8: +; X86: # BB#0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: retl +; +; X64-LABEL: identity_i8: +; X64: # BB#0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) %c = call i8 @llvm.bitreverse.i8(i8 %b) ret i8 %c } define <2 x i16> @identity_v2i16(<2 x i16> %a) { -; CHECK-LABEL: identity_v2i16: -; CHECK: # BB#0: -; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: retl +; X86-LABEL: identity_v2i16: +; X86: # BB#0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NEXT: retl +; +; X64-LABEL: identity_v2i16: +; X64: # BB#0: +; X64-NEXT: retq %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) %c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b) ret <2 x i16> %c @@ -195,17 +498,25 @@ define <2 x i16> @identity_v2i16(<2 x i16> %a) { ; These tests check that bitreverse(undef) calls are removed define i8 @undef_i8() { -; CHECK-LABEL: undef_i8: -; CHECK: # BB#0: -; CHECK-NEXT: retl +; X86-LABEL: undef_i8: +; X86: # BB#0: +; X86-NEXT: retl +; +; X64-LABEL: undef_i8: +; X64: # BB#0: +; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 undef) ret i8 %b } define <2 x i16> @undef_v2i16() { -; CHECK-LABEL: undef_v2i16: -; CHECK: # BB#0: -; CHECK-NEXT: retl +; X86-LABEL: undef_v2i16: +; X86: # BB#0: +; X86-NEXT: retl +; +; X64-LABEL: undef_v2i16: +; X64: # BB#0: +; X64-NEXT: retq %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) ret <2 x i16> %b } -- 2.50.1