From: Chandler Carruth Date: Mon, 21 Aug 2017 08:45:22 +0000 (+0000) Subject: [x86] Teach the "generic" x86 CPU to avoid patterns that are slow on X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b54cd97e1726978f8919c4af41a5b16fa8385c7;p=llvm [x86] Teach the "generic" x86 CPU to avoid patterns that are slow on widely used processors. This occured to me when I saw that we were generating 'inc' and 'dec' when for Haswell and newer we shouldn't. However, there were a few "X is slow" things that we should probably just set. I've avoided any of the "X is fast" features because most of those would be pretty serious regressions on processors where X isn't actually fast. The slow things are likely to be negligible costs on processors where these aren't slow and a significant win when they are slow. In retrospect this seems somewhat obvious. Not sure why we didn't do this a long time ago. Differential Revision: https://reviews.llvm.org/D36947 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311318 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 54eabeac512..8fc2a63c8fc 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -866,9 +866,17 @@ def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. -def : ProcessorModel<"x86-64", SandyBridgeModel, - [FeatureX87, FeatureMMX, FeatureSSE2, FeatureFXSR, - Feature64Bit, FeatureSlowBTMem ]>; +def : ProcessorModel<"x86-64", SandyBridgeModel, [ + FeatureX87, + FeatureMMX, + FeatureSSE2, + FeatureFXSR, + Feature64Bit, + FeatureSlow3OpsLEA, + FeatureSlowBTMem, + FeatureSlowIncDec, + FeatureSlowUAMem32 +]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index 23b30b5d316..f6f326c88fc 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -752,7 +752,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; GENERIC-LABEL: test_cvtdq2ps: ; GENERIC: # BB#0: ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] +; GENERIC-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] +; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50] +; GENERIC-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1954,9 +1956,11 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; GENERIC-LABEL: test_movupd: ; GENERIC: # BB#0: -; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movupd: @@ -1997,9 +2001,11 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; GENERIC-LABEL: test_movups: ; GENERIC: # BB#0: -; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movups: diff --git a/test/CodeGen/X86/lea32-schedule.ll b/test/CodeGen/X86/lea32-schedule.ll index 8aedb8daf0d..eb4e3009491 100644 --- a/test/CodeGen/X86/lea32-schedule.ll +++ b/test/CodeGen/X86/lea32-schedule.ll @@ -179,7 +179,8 @@ define i32 @test_lea_add_offset(i32, i32) { ; GENERIC: # BB#0: ; GENERIC-NEXT: # kill: %ESI %ESI %RSI ; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50] +; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; GENERIC-NEXT: addl $16, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_offset: @@ -241,7 +242,9 @@ define i32 @test_lea_add_offset_big(i32, i32) { ; GENERIC: # BB#0: ; GENERIC-NEXT: # kill: %ESI %ESI %RSI ; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50] +; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; GENERIC-NEXT: addl $-4096, %eax # imm = 0xF000 +; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_offset_big: @@ -356,7 +359,8 @@ define i32 @test_lea_mul_offset(i32) { ; GENERIC-LABEL: test_lea_mul_offset: ; GENERIC: # BB#0: ; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50] +; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; GENERIC-NEXT: addl $-32, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_mul_offset: @@ -411,7 +415,9 @@ define i32 @test_lea_mul_offset_big(i32) { ; GENERIC-LABEL: test_lea_mul_offset_big: ; GENERIC: # BB#0: ; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50] +; GENERIC-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; GENERIC-NEXT: addl $10000, %eax # imm = 0x2710 +; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_mul_offset_big: @@ -529,7 +535,8 @@ define i32 @test_lea_add_scale_offset(i32, i32) { ; GENERIC: # BB#0: ; GENERIC-NEXT: # kill: %ESI %ESI %RSI ; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50] +; GENERIC-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] +; GENERIC-NEXT: addl $96, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_scale_offset: @@ -592,7 +599,9 @@ define i32 @test_lea_add_scale_offset_big(i32, i32) { ; GENERIC: # BB#0: ; GENERIC-NEXT: # kill: %ESI %ESI %RSI ; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50] +; GENERIC-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] +; GENERIC-NEXT: addl $-1200, %eax # imm = 0xFB50 +; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_scale_offset_big: diff --git a/test/CodeGen/X86/lea64-schedule.ll b/test/CodeGen/X86/lea64-schedule.ll index c81f112e9ec..dd629c33ed0 100644 --- a/test/CodeGen/X86/lea64-schedule.ll +++ b/test/CodeGen/X86/lea64-schedule.ll @@ -149,7 +149,8 @@ define i64 @test_lea_add(i64, i64) { define i64 @test_lea_add_offset(i64, i64) { ; GENERIC-LABEL: test_lea_add_offset: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50] +; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; GENERIC-NEXT: addq $16, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_offset: @@ -197,7 +198,9 @@ define i64 @test_lea_add_offset(i64, i64) { define i64 @test_lea_add_offset_big(i64, i64) { ; GENERIC-LABEL: test_lea_add_offset_big: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50] +; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; GENERIC-NEXT: addq $-4096, %rax # imm = 0xF000 +; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_offset_big: @@ -292,7 +295,8 @@ define i64 @test_lea_mul(i64) { define i64 @test_lea_mul_offset(i64) { ; GENERIC-LABEL: test_lea_mul_offset: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50] +; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; GENERIC-NEXT: addq $-32, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_mul_offset: @@ -340,7 +344,9 @@ define i64 @test_lea_mul_offset(i64) { define i64 @test_lea_mul_offset_big(i64) { ; GENERIC-LABEL: test_lea_mul_offset_big: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50] +; GENERIC-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; GENERIC-NEXT: addq $10000, %rax # imm = 0x2710 +; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_mul_offset_big: @@ -436,7 +442,8 @@ define i64 @test_lea_add_scale(i64, i64) { define i64 @test_lea_add_scale_offset(i64, i64) { ; GENERIC-LABEL: test_lea_add_scale_offset: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50] +; GENERIC-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] +; GENERIC-NEXT: addq $96, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_scale_offset: @@ -485,7 +492,9 @@ define i64 @test_lea_add_scale_offset(i64, i64) { define i64 @test_lea_add_scale_offset_big(i64, i64) { ; GENERIC-LABEL: test_lea_add_scale_offset_big: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50] +; GENERIC-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] +; GENERIC-NEXT: addq $-1200, %rax # imm = 0xFB50 +; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lea_add_scale_offset_big: diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 665c5110e7c..bc89d90d5d8 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -299,7 +299,8 @@ define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) { define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { ; ALL-LABEL: shuffle_v16f32_extract_256: ; ALL: # BB#0: -; ALL-NEXT: vmovups 32(%rsi), %ymm0 +; ALL-NEXT: vmovups 32(%rsi), %xmm0 +; ALL-NEXT: vinsertf128 $1, 48(%rsi), %ymm0, %ymm0 ; ALL-NEXT: retq %ptr_a = bitcast float* %a to <16 x float>* %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4 diff --git a/test/DebugInfo/COFF/register-variables.ll b/test/DebugInfo/COFF/register-variables.ll index d3a0633a0e5..02ddd42f616 100644 --- a/test/DebugInfo/COFF/register-variables.ll +++ b/test/DebugInfo/COFF/register-variables.ll @@ -42,7 +42,7 @@ ; ASM-DAG: #DEBUG_VALUE: inlineinc:a <- %EAX ; ASM-DAG: #DEBUG_VALUE: a <- %EAX ; ASM-DAG: #DEBUG_VALUE: f:p <- %ESI -; ASM: incl %eax +; ASM: addl $1, %eax ; ASM: [[after_inc_eax:\.Ltmp.*]]: ; ASM: #DEBUG_VALUE: inlineinc:b <- %EAX ; ASM: #DEBUG_VALUE: b <- %EAX @@ -104,7 +104,7 @@ ; OBJ: LocalVariableAddrRange { ; OBJ: OffsetStart: .text+0x7 ; OBJ: ISectStart: 0x0 -; OBJ: Range: 0x18 +; OBJ: Range: 0x19 ; OBJ: } ; OBJ: } ; OBJ: LocalSym { @@ -118,7 +118,7 @@ ; OBJ: LocalVariableAddrRange { ; OBJ: OffsetStart: .text+0xC ; OBJ: ISectStart: 0x0 -; OBJ: Range: 0x6 +; OBJ: Range: 0x7 ; OBJ: } ; OBJ: } ; OBJ: LocalSym { @@ -144,7 +144,7 @@ ; OBJ: DefRangeRegisterSym { ; OBJ: Register: 17 ; OBJ: MayHaveNoName: 0 -; OBJ: OffsetStart: .text+0x12 +; OBJ: OffsetStart: .text+0x13 ; OBJ: ISectStart: 0x0 ; OBJ: Range: 0x6 ; OBJ: } @@ -166,7 +166,7 @@ ; OBJ: LocalVariableAddrRange { ; OBJ: OffsetStart: .text+0xC ; OBJ: ISectStart: 0x0 -; OBJ: Range: 0x6 +; OBJ: Range: 0x7 ; OBJ: } ; OBJ: } ; OBJ: LocalSym { @@ -178,7 +178,7 @@ ; OBJ: DefRangeRegisterSym { ; OBJ: Register: 17 ; OBJ: LocalVariableAddrRange { -; OBJ: OffsetStart: .text+0x12 +; OBJ: OffsetStart: .text+0x13 ; OBJ: ISectStart: 0x0 ; OBJ: Range: 0x6 ; OBJ: }