From 16942ada102a4ba2d045d238722624251cf4bfb8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 8 May 2016 07:10:47 +0000 Subject: [PATCH] [X86] No need to avoid selecting AVX_SET0 for 256-bit integer types when only AVX1 is supported. AVX_SET0 just expands to 256-bit VXORPS which is legal in AVX1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268871 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 24 +------------------ test/CodeGen/X86/2012-01-12-extract-sv.ll | 3 +-- test/CodeGen/X86/2012-1-10-buildvector.ll | 2 +- test/CodeGen/X86/avx-select.ll | 4 ++-- .../X86/merge-consecutive-loads-256.ll | 4 ++-- test/CodeGen/X86/vector-shuffle-256-v8.ll | 8 +++---- 6 files changed, 11 insertions(+), 34 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 21852a2d90d..1490fd03fe5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -494,36 +494,14 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX] in { def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; - -let Predicates = [HasAVX2] in { def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; } -// AVX1 has no support for 256-bit integer instructions, but since the 128-bit -// VPXOR instruction writes zero to its upper part, it's safe build zeros. -let Predicates = [HasAVX1Only] in { -def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; -def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), - (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; - -def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; -def : Pat<(bc_v16i16 (v8f32 immAllZerosV)), - (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; - -def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; -def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), - (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; - -def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; -def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), - (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; -} - // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll index fec1caa2e86..68450add580 100644 --- a/test/CodeGen/X86/2012-01-12-extract-sv.ll +++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll @@ -8,9 +8,8 @@ define void @endless_loop() { ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7] ; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7] ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] ; CHECK-NEXT: vmovaps %ymm0, (%eax) ; CHECK-NEXT: vmovaps %ymm1, (%eax) diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll index eb237847e1b..2d1b5960d98 100644 --- a/test/CodeGen/X86/2012-1-10-buildvector.ll +++ b/test/CodeGen/X86/2012-1-10-buildvector.ll @@ -4,7 +4,7 @@ define void @bad_cast() { ; CHECK-LABEL: bad_cast: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vmovaps %xmm0, (%eax) ; CHECK-NEXT: movl $0, (%eax) ; CHECK-NEXT: vzeroupper diff --git a/test/CodeGen/X86/avx-select.ll b/test/CodeGen/X86/avx-select.ll index 514feff2112..cdd3180d624 100644 --- a/test/CodeGen/X86/avx-select.ll +++ b/test/CodeGen/X86/avx-select.ll @@ -4,7 +4,7 @@ define <8 x i32> @select00(i32 %a, <8 x i32> %b) nounwind { ; CHECK-LABEL: select00: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; CHECK-NEXT: cmpl $255, %edi ; CHECK-NEXT: je LBB0_2 ; CHECK-NEXT: ## BB#1: @@ -21,7 +21,7 @@ define <8 x i32> @select00(i32 %a, <8 x i32> %b) nounwind { define <4 x i64> @select01(i32 %a, <4 x i64> %b) nounwind { ; CHECK-LABEL: select01: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; CHECK-NEXT: cmpl $255, %edi ; CHECK-NEXT: je LBB1_2 ; CHECK-NEXT: ## BB#1: diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll index f77df77b8e0..8c2e9372900 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-256.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -432,7 +432,7 @@ define <8 x i32> @merge_8i32_i32_56zz9uzz(i32* %ptr) nounwind uwtable noinline s define <8 x i32> @merge_8i32_i32_1u3u5zu8(i32* %ptr) nounwind uwtable noinline ssp { ; AVX1-LABEL: merge_8i32_i32_1u3u5zu8: ; AVX1: # BB#0: -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7] ; AVX1-NEXT: retq ; @@ -451,7 +451,7 @@ define <8 x i32> @merge_8i32_i32_1u3u5zu8(i32* %ptr) nounwind uwtable noinline s ; X32-AVX-LABEL: merge_8i32_i32_1u3u5zu8: ; X32-AVX: # BB#0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7] ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1 diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index e89323dfd25..ceda13c22ee 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1907,7 +1907,7 @@ define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: ; AVX1: # BB#0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] ; AVX1-NEXT: retq ; @@ -1922,7 +1922,7 @@ define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { ; AVX1-LABEL: shuffle_v8i32_9ubzdefz: ; AVX1: # BB#0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] ; AVX1-NEXT: retq @@ -2048,7 +2048,7 @@ define <8 x float> @splat_v8f32(<4 x float> %r) { define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: ; AVX1: # BB#0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] ; AVX1-NEXT: retq @@ -2064,7 +2064,7 @@ define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: ; AVX1: # BB#0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; AVX1-NEXT: retq -- 2.50.1