From: Craig Topper Date: Sat, 6 Jul 2019 17:59:45 +0000 (+0000) Subject: [X86] Add patterns to select MOVLPDrm from MOVSD+load and MOVHPD from UNPCKL+load. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f6c16ed9a4b2f516501b40951496215488af71cb;p=llvm [X86] Add patterns to select MOVLPDrm from MOVSD+load and MOVHPD from UNPCKL+load. These narrow the load so we can only do it if the load isn't volatile. There also tests in vector-shuffle-128-v4.ll that this should support, but we don't seem to fold bitcast+load on pre-sse4.2 targets due to the slow unaligned mem 16 flag. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365266 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dc2db1e8e61..76530adc152 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -748,6 +748,13 @@ let Predicates = [UseSSE2] in { (MOVLPDrm VR128:$src1, addr:$src2)>; } +let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { + // Use MOVLPD to load into the low bits from a full vector unless we can use + // BLENDPD. + def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions //===----------------------------------------------------------------------===// @@ -2075,6 +2082,13 @@ let Predicates = [HasAVX1Only] in { (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; } +let Predicates = [UseSSE2] in { + // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. + def : Pat<(v2f64 (X86Unpckl VR128:$src1, + (v2f64 (nonvolatile_load addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Extract Floating-Point Sign mask //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index 761855e5c62..2b42d33000a 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -1309,8 +1309,7 @@ define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) { define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { ; SSE-LABEL: shuffle_mem_v2f64_02: ; SSE: # %bb.0: -; SSE-NEXT: movups (%rdi), %xmm1 -; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_mem_v2f64_02: @@ -1325,20 +1324,17 @@ define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) { ; SSE2-LABEL: shuffle_mem_v2f64_21: ; SSE2: # %bb.0: -; SSE2-NEXT: movupd (%rdi), %xmm1 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_mem_v2f64_21: ; SSE3: # %bb.0: -; SSE3-NEXT: movupd (%rdi), %xmm1 -; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_mem_v2f64_21: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movupd (%rdi), %xmm1 -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_mem_v2f64_21: