From 8d23f77d881bfaa87a12499bbd93ce5c0392c9f6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 29 Mar 2017 06:55:28 +0000 Subject: [PATCH] [AVX-512] Remove explicit KMOVWrk/KMOVWKr instructions from patterns where we can just use COPY_TO_REGCLASS instead. This will result in a KMOVW or KMOVD being emitted during register allocation. And in at least some cases this might allow the register coalescer to remove the copy all together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 28 +++++------ test/CodeGen/X86/avx512-cmp.ll | 61 +++++++++++++++-------- test/CodeGen/X86/avx512-ext.ll | 2 +- test/CodeGen/X86/avx512-insert-extract.ll | 26 +++++----- test/CodeGen/X86/avx512-mask-op.ll | 24 ++++----- test/CodeGen/X86/fma-fneg-combine.ll | 24 ++++++--- test/CodeGen/X86/masked_gather_scatter.ll | 30 +++++------ test/CodeGen/X86/pr27591.ll | 6 +-- test/CodeGen/X86/pr28173.ll | 27 +++------- test/CodeGen/X86/pr32241.ll | 2 +- test/CodeGen/X86/pr32256.ll | 2 +- test/CodeGen/X86/xmulo.ll | 2 +- 12 files changed, 122 insertions(+), 112 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 33054e3fde5..797e5c768f5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2279,44 +2279,41 @@ let Predicates = [HasBWI] in { let Predicates = [HasAVX512] in { def : Pat<(i1 (trunc (i64 GR64:$src))), - (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit), - (i32 1))), VK1)>; + (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit), + (i32 1)), VK1)>; def : Pat<(i1 (trunc (i32 GR32:$src))), - (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>; + (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>; def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))), (COPY_TO_REGCLASS GR32:$src, VK1)>; def : Pat<(i1 (trunc (i8 GR8:$src))), (COPY_TO_REGCLASS - (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit), (i32 1))), - VK1)>; + (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1)), VK1)>; def : Pat<(i1 (trunc (i16 GR16:$src))), (COPY_TO_REGCLASS - (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit), (i32 1))), - VK1)>; + (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR16:$src, sub_16bit), (i32 1)), VK1)>; def : Pat<(i32 (zext VK1:$src)), - (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; + (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>; def : Pat<(i32 (anyext VK1:$src)), (COPY_TO_REGCLASS VK1:$src, GR32)>; def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG - (AND32ri8 (KMOVWrk - (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>; + (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>; def : Pat<(i8 (anyext VK1:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>; def : Pat<(i64 (zext VK1:$src)), - (AND64ri8 (SUBREG_TO_REG (i64 0), - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; + (SUBREG_TO_REG (i64 0), + (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>; def : Pat<(i64 (anyext VK1:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), @@ -2324,8 +2321,7 @@ let Predicates = [HasAVX512] in { def : Pat<(i16 (zext VK1:$src)), (EXTRACT_SUBREG - (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), - sub_16bit)>; + (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>; def : Pat<(i16 (anyext VK1:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>; diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index e556495bfb4..fcfb9955b5b 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -158,26 +158,47 @@ B: } define i32 @test10(i64 %b, i64 %c, i1 %d) { -; ALL-LABEL: test10: -; ALL: ## BB#0: -; ALL-NEXT: andl $1, %edx -; ALL-NEXT: kmovw %edx, %k0 -; ALL-NEXT: cmpq %rsi, %rdi -; ALL-NEXT: sete %al -; ALL-NEXT: andl $1, %eax -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: korw %k1, %k0, %k1 -; ALL-NEXT: kxorw %k1, %k0, %k0 -; ALL-NEXT: kmovw %k0, %eax -; ALL-NEXT: andl $1, %eax -; ALL-NEXT: testb %al, %al -; ALL-NEXT: je LBB8_1 -; ALL-NEXT: ## BB#2: ## %if.end.i -; ALL-NEXT: movl $6, %eax -; ALL-NEXT: retq -; ALL-NEXT: LBB8_1: ## %if.then.i -; ALL-NEXT: movl $5, %eax -; ALL-NEXT: retq +; KNL-LABEL: test10: +; KNL: ## BB#0: +; KNL-NEXT: andl $1, %edx +; KNL-NEXT: kmovw %edx, %k0 +; KNL-NEXT: cmpq %rsi, %rdi +; KNL-NEXT: sete %al +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: korw %k1, %k0, %k1 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: testb %al, %al +; KNL-NEXT: je LBB8_1 +; KNL-NEXT: ## BB#2: ## %if.end.i +; KNL-NEXT: movl $6, %eax +; KNL-NEXT: retq +; KNL-NEXT: LBB8_1: ## %if.then.i +; KNL-NEXT: movl $5, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test10: +; SKX: ## BB#0: +; SKX-NEXT: andl $1, %edx +; SKX-NEXT: kmovd %edx, %k0 +; SKX-NEXT: cmpq %rsi, %rdi +; SKX-NEXT: sete %al +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: korw %k1, %k0, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: testb %al, %al +; SKX-NEXT: je LBB8_1 +; SKX-NEXT: ## BB#2: ## %if.end.i +; SKX-NEXT: movl $6, %eax +; SKX-NEXT: retq +; SKX-NEXT: LBB8_1: ## %if.then.i +; SKX-NEXT: movl $5, %eax +; SKX-NEXT: retq %cmp8.i = icmp eq i64 %b, %c %or1 = or i1 %d, %cmp8.i diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll index 2f56ad4c15c..796ee83b6fa 100644 --- a/test/CodeGen/X86/avx512-ext.ll +++ b/test/CodeGen/X86/avx512-ext.ll @@ -1448,7 +1448,7 @@ define i16 @trunc_i32_to_i1(i32 %a) { ; SKX-LABEL: trunc_i32_to_i1: ; SKX: ## BB#0: ; SKX-NEXT: andl $1, %edi -; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: movw $-4, %ax ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kshiftrw $1, %k1, %k1 diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 7b179570ff0..87928348a85 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -275,7 +275,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { ; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftlw $11, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: testb %al, %al ; SKX-NEXT: je LBB10_2 @@ -317,7 +317,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ; SKX-NEXT: kunpckbw %k0, %k1, %k0 ; SKX-NEXT: kshiftlw $15, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: testb %al, %al ; SKX-NEXT: cmoveq %rsi, %rdi @@ -351,7 +351,7 @@ define i16 @test13(i32 %a, i32 %b) { ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovd %eax, %k0 ; SKX-NEXT: movw $-4, %ax ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kshiftrw $1, %k1, %k1 @@ -384,7 +384,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; SKX-NEXT: kshiftlb $3, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: testb %al, %al ; SKX-NEXT: cmoveq %rsi, %rdi @@ -1284,7 +1284,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovd %eax, %k0 ; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k1 ; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 @@ -1350,7 +1350,7 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovd %eax, %k0 ; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k1 ; SKX-NEXT: vpmovm2d %k1, %xmm0 ; SKX-NEXT: vpmovm2d %k0, %xmm1 @@ -1397,7 +1397,7 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovd %eax, %k0 ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ; SKX-NEXT: kshiftlw $1, %k1, %k1 ; SKX-NEXT: kshiftrw $1, %k1, %k1 @@ -1431,7 +1431,7 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) { ; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 ; SKX-NEXT: kshiftlw $15, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: cmpb $1, %al ; SKX-NEXT: movb $3, %al @@ -1461,7 +1461,7 @@ define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) { ; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 ; SKX-NEXT: kshiftlw $15, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: cmpb $1, %al ; SKX-NEXT: movb $3, %al @@ -1491,7 +1491,7 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) { ; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 ; SKX-NEXT: kshiftlw $12, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: retq %t1 = icmp ugt <4 x i32> %a, %b @@ -1516,7 +1516,7 @@ define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) { ; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ; SKX-NEXT: kshiftld $29, %k0, %k0 ; SKX-NEXT: kshiftrd $31, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1543,7 +1543,7 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) { ; SKX: ## BB#0: ; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftrq $63, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: cmpb $1, %al ; SKX-NEXT: movb $3, %al @@ -1574,7 +1574,7 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) { ; SKX: ## BB#0: ; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftrq $63, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: cmpb $1, %al ; SKX-NEXT: movb $3, %al diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 4e1622b3174..bd2146d8df5 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -332,7 +332,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftlw $10, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -342,7 +342,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: andl $1, %eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -378,7 +378,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftlw $10, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: vzeroupper @@ -389,7 +389,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: andl $1, %eax ; AVX512BW-NEXT: ## kill: %AX %AX %EAX ; AVX512BW-NEXT: vzeroupper @@ -427,7 +427,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftlw $10, %k0, %k0 ; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: vzeroupper @@ -438,7 +438,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: andl $1, %eax ; AVX512BW-NEXT: ## kill: %AL %AL %EAX ; AVX512BW-NEXT: vzeroupper @@ -1086,7 +1086,7 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; SKX-NEXT: cmpl %edx, %esi ; SKX-NEXT: setg %al ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vpmovm2b %k1, %zmm0 ; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 ; SKX-NEXT: vpmovm2b %k0, %zmm1 @@ -1104,7 +1104,7 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; AVX512BW-NEXT: cmpl %edx, %esi ; AVX512BW-NEXT: setg %al ; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: kmovw %eax, %k1 +; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: vpmovm2b %k1, %zmm0 ; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 @@ -1395,7 +1395,7 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { ; SKX-LABEL: store_v1i1: ; SKX: ## BB#0: ; SKX-NEXT: andl $1, %edi -; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: kshiftrw $15, %k1, %k1 ; SKX-NEXT: kxorw %k1, %k0, %k0 @@ -1405,7 +1405,7 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { ; AVX512BW-LABEL: store_v1i1: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: andl $1, %edi -; AVX512BW-NEXT: kmovw %edi, %k0 +; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1 ; AVX512BW-NEXT: kshiftrw $15, %k1, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 @@ -1629,7 +1629,7 @@ define void @f1(i32 %c) { ; SKX-NEXT: movzbl {{.*}}(%rip), %edi ; SKX-NEXT: movl %edi, %eax ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovd %eax, %k0 ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: kshiftrw $15, %k1, %k1 ; SKX-NEXT: kxorw %k1, %k0, %k0 @@ -1642,7 +1642,7 @@ define void @f1(i32 %c) { ; AVX512BW-NEXT: movzbl {{.*}}(%rip), %edi ; AVX512BW-NEXT: movl %edi, %eax ; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: kmovw %eax, %k0 +; AVX512BW-NEXT: kmovd %eax, %k0 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1 ; AVX512BW-NEXT: kshiftrw $15, %k1, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 diff --git a/test/CodeGen/X86/fma-fneg-combine.ll b/test/CodeGen/X86/fma-fneg-combine.ll index 6cdb3c3d6c6..bb332f7282a 100644 --- a/test/CodeGen/X86/fma-fneg-combine.ll +++ b/test/CodeGen/X86/fma-fneg-combine.ll @@ -142,7 +142,7 @@ define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; SKX: # BB#0: # %entry ; SKX-NEXT: vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm0 ; SKX-NEXT: andl $1, %edi -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vfmadd231ss %xmm1, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq ; @@ -183,13 +183,21 @@ entry: } define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { -; CHECK-LABEL: test13: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} -; CHECK-NEXT: retq +; SKX-LABEL: test13: +; SKX: # BB#0: # %entry +; SKX-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} +; SKX-NEXT: retq +; +; KNL-LABEL: test13: +; KNL: # BB#0: # %entry +; KNL-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} +; KNL-NEXT: retq entry: %sub.i = fsub <2 x double> , %a %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4) diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 20c1810952b..1a15cab97e2 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -1691,12 +1691,12 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; KNL_32-LABEL: test_gather_16i64: ; KNL_32: # BB#0: ; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi0: +; KNL_32-NEXT: .Lcfi4: ; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi1: +; KNL_32-NEXT: .Lcfi5: ; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi2: +; KNL_32-NEXT: .Lcfi6: ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp @@ -1814,12 +1814,12 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; KNL_32-LABEL: test_gather_16f64: ; KNL_32: # BB#0: ; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi3: +; KNL_32-NEXT: .Lcfi7: ; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi4: +; KNL_32-NEXT: .Lcfi8: ; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi5: +; KNL_32-NEXT: .Lcfi9: ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp @@ -1936,12 +1936,12 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; KNL_32-LABEL: test_scatter_16i64: ; KNL_32: # BB#0: ; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi6: +; KNL_32-NEXT: .Lcfi10: ; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi7: +; KNL_32-NEXT: .Lcfi11: ; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi8: +; KNL_32-NEXT: .Lcfi12: ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp @@ -2058,12 +2058,12 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; KNL_32-LABEL: test_scatter_16f64: ; KNL_32: # BB#0: ; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi9: +; KNL_32-NEXT: .Lcfi13: ; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi10: +; KNL_32-NEXT: .Lcfi14: ; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi11: +; KNL_32-NEXT: .Lcfi15: ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp @@ -2139,12 +2139,12 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6 ; KNL_32-LABEL: test_pr28312: ; KNL_32: # BB#0: ; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi12: +; KNL_32-NEXT: .Lcfi16: ; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi13: +; KNL_32-NEXT: .Lcfi17: ; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi14: +; KNL_32-NEXT: .Lcfi18: ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-32, %esp ; KNL_32-NEXT: subl $32, %esp diff --git a/test/CodeGen/X86/pr27591.ll b/test/CodeGen/X86/pr27591.ll index 5e9736dbb87..3ff6c096d09 100644 --- a/test/CodeGen/X86/pr27591.ll +++ b/test/CodeGen/X86/pr27591.ll @@ -12,7 +12,7 @@ define void @test1(i32 %x) #0 { ; CHECK-NEXT: # implicit-def: %EDI ; CHECK-NEXT: movb %al, %dil ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: kmovd %edi, %k0 ; CHECK-NEXT: kmovd %k0, %edi ; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: andb $1, %al @@ -35,8 +35,8 @@ define void @test2(i32 %x) #0 { ; CHECK-NEXT: # implicit-def: %EDI ; CHECK-NEXT: movb %al, %dil ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %k0, %edi +; CHECK-NEXT: kmovd %edi, %k0 +; CHECK-NEXT: kmovd %k0, %edi ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: xorl %edi, %edi diff --git a/test/CodeGen/X86/pr28173.ll b/test/CodeGen/X86/pr28173.ll index db7d3335215..d9622b99bd9 100644 --- a/test/CodeGen/X86/pr28173.ll +++ b/test/CodeGen/X86/pr28173.ll @@ -5,9 +5,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Note that the kmovs should really *not* appear in the output, this is an -; artifact of the current poor lowering. This is tracked by PR28175. - define i64 @foo64(i1 zeroext %i) #0 { ; CHECK-LABEL: foo64: ; CHECK: # BB#0: @@ -43,25 +40,13 @@ end: ret i16 %v } -; This code is still not optimal define i16 @foo16_1(i1 zeroext %i, i32 %j) #0 { -; KNL-LABEL: foo16_1: -; KNL: # BB#0: -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: orl $2, %eax -; KNL-NEXT: # kill: %AX %AX %EAX -; KNL-NEXT: retq -; -; SKX-LABEL: foo16_1: -; SKX: # BB#0: -; SKX-NEXT: kmovd %edi, %k0 -; SKX-NEXT: kmovw %k0, %eax -; SKX-NEXT: andl $1, %eax -; SKX-NEXT: orl $2, %eax -; SKX-NEXT: # kill: %AX %AX %EAX -; SKX-NEXT: retq +; CHECK-LABEL: foo16_1: +; CHECK: # BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: orl $2, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq br label %bb bb: diff --git a/test/CodeGen/X86/pr32241.ll b/test/CodeGen/X86/pr32241.ll index 1e57a281481..d8ce230057e 100644 --- a/test/CodeGen/X86/pr32241.ll +++ b/test/CodeGen/X86/pr32241.ll @@ -41,7 +41,7 @@ define i32 @_Z3foov() { ; CHECK-NEXT: jmp .LBB0_4 ; CHECK-NEXT: .LBB0_4: # %lor.end5 ; CHECK-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # 2-byte Reload -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) diff --git a/test/CodeGen/X86/pr32256.ll b/test/CodeGen/X86/pr32256.ll index 4a37dad0961..cb26c13e53e 100644 --- a/test/CodeGen/X86/pr32256.ll +++ b/test/CodeGen/X86/pr32256.ll @@ -16,7 +16,7 @@ define void @_Z1av() { ; CHECK-NEXT: # implicit-def: %EAX ; CHECK-NEXT: movb %cl, %al ; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: kxnorw %k0, %k0, %k3 ; CHECK-NEXT: kshiftrw $15, %k3, %k3 diff --git a/test/CodeGen/X86/xmulo.ll b/test/CodeGen/X86/xmulo.ll index e0f2fc16d09..aed305058f0 100644 --- a/test/CodeGen/X86/xmulo.ll +++ b/test/CodeGen/X86/xmulo.ll @@ -713,10 +713,10 @@ define i1 @bug27873(i64 %c1, i1 %c2) { ; KNL-LABEL: bug27873: ; KNL: ## BB#0: ; KNL-NEXT: andl $1, %esi -; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: mulq %rcx +; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: seto %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 -- 2.40.0