From 374362d92036c3f80e17bcc5051b5451498b350a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 31 Jan 2017 05:18:29 +0000 Subject: [PATCH] [X86] Update the broadcast fallback patterns to use shuffle instructions from the appropriate execution domain. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293603 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 12 +++++------ test/CodeGen/X86/avx-vbroadcast.ll | 34 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2e0cd64bdc0..bc32eee1516 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8314,15 +8314,15 @@ let Predicates = [HasAVX, NoVLX] in { let Predicates = [HasAVX1Only] in { def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; + (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), - (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), - (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; + (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), + (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), - (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), - (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; + (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_xmm), + (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), 1)>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; @@ -8336,7 +8336,7 @@ let Predicates = [HasAVX1Only] in { (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; def : Pat<(v2i64 (X86VBroadcast i64:$src)), - (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>; + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44)>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index 312f4237cd2..77b9c0dc8f1 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -49,7 +49,7 @@ define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp { ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: vmovq %rax, %xmm0 ; X64-NEXT: movq %rax, (%rsi) -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -114,7 +114,7 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { ; X32-NEXT: movl (%ecx), %ecx ; X32-NEXT: vmovd %ecx, %xmm0 ; X32-NEXT: movl %ecx, (%eax) -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; @@ -123,7 +123,7 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: movl %eax, (%rsi) -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -167,7 +167,7 @@ define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone s ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: vmovsd %xmm0, (%eax) -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; @@ -175,7 +175,7 @@ define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone s ; X64: ## BB#0: ## %entry ; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: vmovsd %xmm0, (%rsi) -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -237,17 +237,17 @@ define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: vmovd %xmm0, (%eax) -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vmovss %xmm0, (%eax) +; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: D3: ; X64: ## BB#0: ## %entry -; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: vmovd %xmm0, (%rsi) -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vmovss %xmm0, (%rsi) +; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -291,16 +291,16 @@ define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: vmovd %xmm0, (%eax) -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vmovss %xmm0, (%eax) +; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X32-NEXT: retl ; ; X64-LABEL: e2: ; X64: ## BB#0: ## %entry -; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: vmovd %xmm0, (%rsi) -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vmovss %xmm0, (%rsi) +; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-NEXT: retq entry: %q = load float, float* %ptr, align 4 -- 2.50.1