From 7ef283f68dcbaab64cf310dfe8014145a189687d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 7 Apr 2019 19:19:44 +0000 Subject: [PATCH] [X86] Use (SUBREG_TO_REG (MOV32rm)) for extloadi64i8/extloadi64i16 when the load is 4 byte aligned or better and not volatile. Summary: Previously we would use MOVZXrm8/MOVZXrm16, but those are longer encodings. This is similar to what we do in the loadi32 predicate. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60341 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357875 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 6 ++++-- lib/Target/X86/X86InstrInfo.td | 14 +++++++++++++- test/CodeGen/X86/fp128-cast.ll | 2 +- test/CodeGen/X86/vector-sext-widen.ll | 14 +++++++------- test/CodeGen/X86/vector-sext.ll | 14 +++++++------- test/CodeGen/X86/zext-logicop-shift-load.ll | 6 +++--- 6 files changed, 35 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 1fcc33e8724..dbc2fbcc6e7 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1279,14 +1279,16 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. +// NOTE: The extloadi64i32 pattern needs to be first as it will try to form +// 32-bit loads for 4 byte aligned i8/i16 loads. +def : Pat<(extloadi64i32 addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; def : Pat<(extloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i8 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i16 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; -def : Pat<(extloadi64i32 addr:$src), - (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index dc5e04cdd67..0176c2d707a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1121,7 +1121,19 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; -def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; + +// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known +// to be 4 byte aligned or better. +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{ + LoadSDNode *LD = cast(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType != ISD::EXTLOAD) + return false; + if (LD->getMemoryVT() == MVT::i32) + return true; + + return LD->getAlignment() >= 4 && !LD->isVolatile(); +}]>; // An 'and' node with a single use. diff --git a/test/CodeGen/X86/fp128-cast.ll b/test/CodeGen/X86/fp128-cast.ll index 71b9c3f7f7b..d299d5337c0 100644 --- a/test/CodeGen/X86/fp128-cast.ll +++ b/test/CodeGen/X86/fp128-cast.ll @@ -413,7 +413,7 @@ define void @TestFPTruncF128_F80() nounwind { ; X64-NEXT: fstpt (%rsp) ; X64-NEXT: movq (%rsp), %rax ; X64-NEXT: movq %rax, {{.*}}(%rip) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movw %ax, vf80+{{.*}}(%rip) ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq diff --git a/test/CodeGen/X86/vector-sext-widen.ll b/test/CodeGen/X86/vector-sext-widen.ll index 327e6fbbdd8..e58b53fc8cf 100644 --- a/test/CodeGen/X86/vector-sext-widen.ll +++ b/test/CodeGen/X86/vector-sext-widen.ll @@ -1494,7 +1494,7 @@ entry: define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; SSE2-LABEL: load_sext_4i1_to_4i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movzbl (%rdi), %eax +; SSE2-NEXT: movl (%rdi), %eax ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shlq $60, %rcx ; SSE2-NEXT: sarq $63, %rcx @@ -1517,7 +1517,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; SSSE3-LABEL: load_sext_4i1_to_4i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movzbl (%rdi), %eax +; SSSE3-NEXT: movl (%rdi), %eax ; SSSE3-NEXT: movq %rax, %rcx ; SSSE3-NEXT: shlq $60, %rcx ; SSSE3-NEXT: sarq $63, %rcx @@ -1540,7 +1540,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; SSE41-LABEL: load_sext_4i1_to_4i32: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movzbl (%rdi), %eax +; SSE41-NEXT: movl (%rdi), %eax ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shlq $62, %rcx ; SSE41-NEXT: sarq $63, %rcx @@ -1560,7 +1560,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movl (%rdi), %eax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: sarq $63, %rcx @@ -1580,7 +1580,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $62, %rcx ; AVX2-NEXT: sarq $63, %rcx @@ -1781,7 +1781,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movl (%rdi), %eax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: sarq $63, %rcx @@ -1805,7 +1805,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i64: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $60, %rcx ; AVX2-NEXT: sarq $63, %rcx diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 9dae10e3f2e..bea09bbad91 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -1494,7 +1494,7 @@ entry: define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; SSE2-LABEL: load_sext_4i1_to_4i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movzbl (%rdi), %eax +; SSE2-NEXT: movl (%rdi), %eax ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shlq $60, %rcx ; SSE2-NEXT: sarq $63, %rcx @@ -1517,7 +1517,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; SSSE3-LABEL: load_sext_4i1_to_4i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movzbl (%rdi), %eax +; SSSE3-NEXT: movl (%rdi), %eax ; SSSE3-NEXT: movq %rax, %rcx ; SSSE3-NEXT: shlq $60, %rcx ; SSSE3-NEXT: sarq $63, %rcx @@ -1540,7 +1540,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; SSE41-LABEL: load_sext_4i1_to_4i32: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movzbl (%rdi), %eax +; SSE41-NEXT: movl (%rdi), %eax ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shlq $62, %rcx ; SSE41-NEXT: sarq $63, %rcx @@ -1560,7 +1560,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movl (%rdi), %eax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: sarq $63, %rcx @@ -1580,7 +1580,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $62, %rcx ; AVX2-NEXT: sarq $63, %rcx @@ -1781,7 +1781,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movl (%rdi), %eax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: sarq $63, %rcx @@ -1805,7 +1805,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i64: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $60, %rcx ; AVX2-NEXT: sarq $63, %rcx diff --git a/test/CodeGen/X86/zext-logicop-shift-load.ll b/test/CodeGen/X86/zext-logicop-shift-load.ll index 40d72243dea..b37eaa44cc9 100644 --- a/test/CodeGen/X86/zext-logicop-shift-load.ll +++ b/test/CodeGen/X86/zext-logicop-shift-load.ll @@ -5,7 +5,7 @@ define i64 @test1(i8* %data) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: shlq $2, %rax ; CHECK-NEXT: andl $60, %eax ; CHECK-NEXT: retq @@ -20,7 +20,7 @@ entry: define i8* @test2(i8* %data) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: andl $15, %eax ; CHECK-NEXT: leaq (%rdi,%rax,4), %rax ; CHECK-NEXT: retq @@ -53,7 +53,7 @@ entry: define i64 @test4(i8* %data) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: shrq $2, %rax ; CHECK-NEXT: andl $60, %eax ; CHECK-NEXT: retq -- 2.50.1