From: Craig Topper Date: Mon, 6 Nov 2017 04:04:01 +0000 (+0000) Subject: [X86] Use EVEX encoded instructions for legacy scalar sqrt intrinsics. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=67a801250fe89edb7f7b110aec210b002307c03b;p=llvm [X86] Use EVEX encoded instructions for legacy scalar sqrt intrinsics. Fixes PR35161. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317445 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 11274d988d2..2e8e0322eb4 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7582,7 +7582,8 @@ multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, } multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, - string SUFF, SDNode OpNode, SDNode OpNodeRnd> { + string SUFF, SDNode OpNode, SDNode OpNodeRnd, + Intrinsic Intr> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar(NAME#SUFF#Zr) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; + def : Pat<(Intr VR128X:$src), + (!cast(NAME#SUFF#Zr_Int) VR128X:$src, + VR128X:$src)>; +} + +let Predicates = [HasAVX512, OptForSize] in { def : Pat<(_.EltVT (OpNode (load addr:$src))), (!cast(NAME#SUFF#Zm) - (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; + (_.EltVT (IMPLICIT_DEF)), addr:$src)>; + + def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))), + (!cast(NAME#SUFF#Zm_Int) + (_.VT (IMPLICIT_DEF)), addr:$src2)>; } + } multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, - NotMemoryFoldable; + X86fsqrtRnds, int_x86_sse_sqrt_ss>, + EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable; defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, + X86fsqrtRnds, int_x86_sse2_sqrt_sd>, + EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, NotMemoryFoldable; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4314506c34f..7cb83bb8906 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3129,18 +3129,14 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, let Predicates = [target] in { def : Pat<(OpNode RC:$src), (!cast("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; - } - let Predicates = [HasAVX] in { def : Pat<(Intr VR128:$src), (!cast("V"#NAME#Suffix##r_Int) VR128:$src, VR128:$src)>; } - let Predicates = [HasAVX, OptForSize] in { + let Predicates = [target, OptForSize] in { def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))), (!cast("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; - } - let Predicates = [target, OptForSize] in { def : Pat<(ScalarVT (OpNode (load addr:$src))), (!cast("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), addr:$src)>; diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll index de02eba683b..ca74ee5732d 100644 --- a/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -486,10 +486,15 @@ define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { ; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse_sqrt_ss: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse_sqrt_ss: +; AVX2: ## BB#0: +; AVX2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse_sqrt_ss: +; SKX: ## BB#0: +; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index dcccdbfc2e6..72c68c56638 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1613,10 +1613,15 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { ; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_sqrt_sd: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_sqrt_sd: +; AVX2: ## BB#0: +; AVX2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_sqrt_sd: +; SKX: ## BB#0: +; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -1642,7 +1647,7 @@ define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) { ; SKX: ## BB#0: ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] ; SKX-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] -; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %a0, align 16 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]