From: Craig Topper Date: Thu, 26 Sep 2019 22:23:09 +0000 (+0000) Subject: [X86] Add CodeGenOnly instructions for (f32 (X86selects $mask, (loadf32 addr), fp32im... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d27d2d709ec0d1d232a228ebfdd2b6381cae2f69;p=llvm [X86] Add CodeGenOnly instructions for (f32 (X86selects $mask, (loadf32 addr), fp32imm0) to use masked MOVSS from memory. Similar for f64 and having a non-zero passthru value. We were previously not trying to fold the load at all. Using a CodeGenOnly instruction allows us to use FR32X/FR64X as the register class to avoid a bunch of COPY_TO_REGCLASS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373021 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index e56e42001e9..f9836067214 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3958,6 +3958,18 @@ multiclass avx512_move_scalar, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; + let isCodeGenOnly = 1 in { + def rmk_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|", + "$dst {${mask}}, $src}"), + [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; + def rmkz_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), + (ins _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", + "$dst {${mask}} {z}, $src}"), + [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; + } } def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), @@ -4222,16 +4234,26 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), + (VMOVSSZrmk_alt FR32X:$src0, VK1WM:$mask, addr:$src)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), + (VMOVSSZrmkz_alt VK1WM:$mask, addr:$src)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), + (VMOVSDZrmk_alt FR64X:$src0, VK1WM:$mask, addr:$src)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), + (VMOVSDZrmkz_alt VK1WM:$mask, addr:$src)>; + let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index ecdca99bea3..3f3141e8876 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -70,9 +70,8 @@ define float @test5(float %p) #0 { ; ALL-NEXT: retq ; ALL-NEXT: LBB3_1: ## %if.end ; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1 -; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; ALL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; ALL-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1} ; ALL-NEXT: retq entry: %cmp = fcmp oeq float %p, 0.000000e+00 diff --git a/test/CodeGen/X86/pr38803.ll b/test/CodeGen/X86/pr38803.ll index a1ca27d89b0..37213424d77 100644 --- a/test/CodeGen/X86/pr38803.ll +++ b/test/CodeGen/X86/pr38803.ll @@ -13,8 +13,7 @@ define float @_Z3fn2v() { ; CHECK-NEXT: callq _Z1av ; CHECK-NEXT: # kill: def $al killed $al def $eax ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1} {z} ; CHECK-NEXT: cmpl $0, {{.*}}(%rip) ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then diff --git a/test/CodeGen/X86/select-of-fp-constants.ll b/test/CodeGen/X86/select-of-fp-constants.ll index b63a71390ef..f492b255d88 100644 --- a/test/CodeGen/X86/select-of-fp-constants.ll +++ b/test/CodeGen/X86/select-of-fp-constants.ll @@ -84,10 +84,9 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone { ; ; X64_AVX512F-LABEL: fcmp_select_fp_constants: ; X64_AVX512F: # %bb.0: -; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k1 ; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64_AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64_AVX512F-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1} ; X64_AVX512F-NEXT: retq %c = fcmp une float %x, -4.0 %r = select i1 %c, float 42.0, float 23.0