From: Craig Topper Date: Fri, 5 Apr 2019 06:32:50 +0000 (+0000) Subject: [X86] Promote i16 SRA instructions to i32 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=392bafc3f6b6ed8dc701b1320acbdc84844fbe32;p=llvm [X86] Promote i16 SRA instructions to i32 We already promote SRL and SHL to i32. This will introduce sign extends sometimes which might be harder to deal with than the zero we use for promoting SRL. I ran this through some of our internal benchmark lists and didn't see any major regressions. I think there might be some DAG combine improvement opportunities in the test changes here. Differential Revision: https://reviews.llvm.org/D60278 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357743 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 71ea61b677b..061a7d9b8f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -42796,6 +42796,7 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::SHL: + case ISD::SRA: case ISD::SRL: case ISD::SUB: case ISD::ADD: @@ -42871,6 +42872,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { case ISD::ANY_EXTEND: break; case ISD::SHL: + case ISD::SRA: case ISD::SRL: { SDValue N0 = Op.getOperand(0); // Look out for (store (shl (load), x)). diff --git a/test/CodeGen/X86/dagcombine-shifts.ll b/test/CodeGen/X86/dagcombine-shifts.ll index f61e9bccddc..d650bd18eaf 100644 --- a/test/CodeGen/X86/dagcombine-shifts.ll +++ b/test/CodeGen/X86/dagcombine-shifts.ll @@ -108,8 +108,9 @@ entry: define i64 @fun8(i16 zeroext %v) { ; CHECK-LABEL: fun8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sarw $4, %di -; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: movswl %di, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: movzwl %ax, %eax ; CHECK-NEXT: shlq $4, %rax ; CHECK-NEXT: retq entry: diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll index 99495700f5a..338e66622dc 100644 --- a/test/CodeGen/X86/iabs.ll +++ b/test/CodeGen/X86/iabs.ll @@ -37,9 +37,9 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-LABEL: test_i16: ; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X86-NO-CMOV-NEXT: movl %eax, %ecx -; X86-NO-CMOV-NEXT: sarw $15, %cx +; X86-NO-CMOV-NEXT: sarl $15, %ecx ; X86-NO-CMOV-NEXT: addl %ecx, %eax ; X86-NO-CMOV-NEXT: xorl %ecx, %eax ; X86-NO-CMOV-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/test/CodeGen/X86/load-scalar-as-vector.ll b/test/CodeGen/X86/load-scalar-as-vector.ll index 7bc9ee0fd6b..b5ac5c7dfb5 100644 --- a/test/CodeGen/X86/load-scalar-as-vector.ll +++ b/test/CodeGen/X86/load-scalar-as-vector.ll @@ -297,16 +297,16 @@ define <8 x i16> @ashr_op0_constant(i16* %p) nounwind { ; SSE-LABEL: ashr_op0_constant: ; SSE: # %bb.0: ; SSE-NEXT: movb (%rdi), %cl -; SSE-NEXT: movw $-42, %ax -; SSE-NEXT: sarw %cl, %ax +; SSE-NEXT: movl $-42, %eax +; SSE-NEXT: sarl %cl, %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ashr_op0_constant: ; AVX: # %bb.0: ; AVX-NEXT: movb (%rdi), %cl -; AVX-NEXT: movw $-42, %ax -; AVX-NEXT: sarw %cl, %ax +; AVX-NEXT: movl $-42, %eax +; AVX-NEXT: sarl %cl, %eax ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq %x = load i16, i16* %p @@ -318,15 +318,15 @@ define <8 x i16> @ashr_op0_constant(i16* %p) nounwind { define <8 x i16> @ashr_op1_constant(i16* %p) nounwind { ; SSE-LABEL: ashr_op1_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzwl (%rdi), %eax -; SSE-NEXT: sarw $7, %ax +; SSE-NEXT: movswl (%rdi), %eax +; SSE-NEXT: sarl $7, %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ashr_op1_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzwl (%rdi), %eax -; AVX-NEXT: sarw $7, %ax +; AVX-NEXT: movswl (%rdi), %eax +; AVX-NEXT: sarl $7, %eax ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq %x = load i16, i16* %p @@ -365,10 +365,11 @@ define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind { ; SSE-NEXT: shrl $16, %ecx ; SSE-NEXT: addl %eax, %ecx ; SSE-NEXT: movzwl %cx, %eax -; SSE-NEXT: sarw $5, %cx +; SSE-NEXT: movswl %ax, %ecx ; SSE-NEXT: shrl $15, %eax -; SSE-NEXT: addl %ecx, %eax -; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: sarl $5, %ecx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: movd %ecx, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sdiv_op1_constant: @@ -378,10 +379,11 @@ define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind { ; AVX-NEXT: shrl $16, %ecx ; AVX-NEXT: addl %eax, %ecx ; AVX-NEXT: movzwl %cx, %eax -; AVX-NEXT: sarw $5, %cx +; AVX-NEXT: movswl %ax, %ecx ; AVX-NEXT: shrl $15, %eax -; AVX-NEXT: addl %ecx, %eax -; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: sarl $5, %ecx +; AVX-NEXT: addl %eax, %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: retq %x = load i16, i16* %p %b = sdiv i16 %x, 42 diff --git a/test/CodeGen/X86/pr32420.ll b/test/CodeGen/X86/pr32420.ll index beaec998a72..2775760e1b0 100644 --- a/test/CodeGen/X86/pr32420.ll +++ b/test/CodeGen/X86/pr32420.ll @@ -14,12 +14,14 @@ define i32 @PR32420() { ; CHECK-NEXT: movzwl (%rcx), %eax ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: shll $12, %edx -; CHECK-NEXT: sarw $12, %dx +; CHECK-NEXT: movswl %dx, %edx +; CHECK-NEXT: shrl $12, %edx ; CHECK-NEXT: movq _b@{{.*}}(%rip), %rsi ; CHECK-NEXT: orw (%rsi), %dx ; CHECK-NEXT: movl (%rcx), %ecx ; CHECK-NEXT: shll $12, %ecx -; CHECK-NEXT: sarw $12, %cx +; CHECK-NEXT: movswl %cx, %ecx +; CHECK-NEXT: shrl $12, %ecx ; CHECK-NEXT: andl %edx, %ecx ; CHECK-NEXT: movw %cx, (%rsi) ; CHECK-NEXT: retq diff --git a/test/CodeGen/X86/speculative-load-hardening.ll b/test/CodeGen/X86/speculative-load-hardening.ll index 32ad43634fc..54cde2c124e 100644 --- a/test/CodeGen/X86/speculative-load-hardening.ll +++ b/test/CodeGen/X86/speculative-load-hardening.ll @@ -1045,10 +1045,10 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind sp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpq $.Lslh_ret_addr23, %rcx ; X64-NEXT: cmovneq %r15, %rax -; X64-NEXT: movzwl (%rbx), %ecx -; X64-NEXT: sarw $7, %cx -; X64-NEXT: movzwl %cx, %edi +; X64-NEXT: movswl (%rbx), %edi +; X64-NEXT: shrl $7, %edi ; X64-NEXT: notl %edi +; X64-NEXT: orl $-65536, %edi # imm = 0xFFFF0000 ; X64-NEXT: orl %eax, %edi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp @@ -1098,10 +1098,10 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind sp ; X64-LFENCE-NEXT: movl (%rbx), %edi ; X64-LFENCE-NEXT: shll $7, %edi ; X64-LFENCE-NEXT: callq sink -; X64-LFENCE-NEXT: movzwl (%rbx), %eax -; X64-LFENCE-NEXT: sarw $7, %ax -; X64-LFENCE-NEXT: movzwl %ax, %edi +; X64-LFENCE-NEXT: movswl (%rbx), %edi +; X64-LFENCE-NEXT: shrl $7, %edi ; X64-LFENCE-NEXT: notl %edi +; X64-LFENCE-NEXT: orl $-65536, %edi # imm = 0xFFFF0000 ; X64-LFENCE-NEXT: callq sink ; X64-LFENCE-NEXT: movzwl (%rbx), %eax ; X64-LFENCE-NEXT: rolw $9, %ax