From: Thomas Lively Date: Sat, 31 Aug 2019 00:12:29 +0000 (+0000) Subject: [WebAssembly] Add SIMD QFMA/QFMS X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fd7135a399c064399ba176e97f3386bd41b8a279;p=llvm [WebAssembly] Add SIMD QFMA/QFMS Summary: Adds clang builtins and LLVM intrinsics for these experimental instructions. They are not implemented in engines yet, but that is ok because the user must opt into using them by calling the builtins. Reviewers: aheejin, dschuff Reviewed By: aheejin Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D67020 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370556 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td index 4750c315611..73d190b0b55 100644 --- a/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/include/llvm/IR/IntrinsicsWebAssembly.td @@ -109,6 +109,14 @@ def int_wasm_alltrue : Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfma : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfms : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Bulk memory intrinsics diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index dd8930f079b..ff031675959 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -732,3 +732,24 @@ foreach t2 = !foldl( ) ) in def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; + +//===----------------------------------------------------------------------===// +// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) +//===----------------------------------------------------------------------===// +multiclass SIMDQFM baseInst> { + defm QFMA_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; + defm QFMS_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; +} + +defm "" : SIMDQFM; +defm "" : SIMDQFM; diff --git a/test/CodeGen/WebAssembly/simd-intrinsics.ll b/test/CodeGen/WebAssembly/simd-intrinsics.ll index 53c98d2722b..2077cf8c70e 100644 --- a/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -290,11 +290,35 @@ define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { declare <4 x float> @llvm.wasm.bitselect.v4f32(<4 x float>, <4 x float>, <4 x float>) define <4 x float> @bitselect_v4f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %c) { %a = call <4 x float> @llvm.wasm.bitselect.v4f32( - <4 x float> %v1, <4 x float> %v2, <4 x float> %c + <4 x float> %v1, <4 x float> %v2, <4 x float> %c ) ret <4 x float> %a } +; CHECK-LABEL: qfma_v4f32: +; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.qfma.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @qfma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { + %v = call <4 x float> @llvm.wasm.qfma.v4f32( + <4 x float> %a, <4 x float> %b, <4 x float> %c + ) + ret <4 x float> %v +} + +; CHECK-LABEL: qfms_v4f32: +; SIMD128-NEXT: .functype qfms_v4f32 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f32x4.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.qfms.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @qfms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { + %v = call <4 x float> @llvm.wasm.qfms.v4f32( + <4 x float> %a, <4 x float> %b, <4 x float> %c + ) + ret <4 x float> %v +} + ; ============================================================================== ; 2 x f64 ; ============================================================================== @@ -309,3 +333,27 @@ define <2 x double> @bitselect_v2f64(<2 x double> %v1, <2 x double> %v2, <2 x do ) ret <2 x double> %a } + +; CHECK-LABEL: qfma_v2f64: +; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.qfma.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @qfma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { + %v = call <2 x double> @llvm.wasm.qfma.v2f64( + <2 x double> %a, <2 x double> %b, <2 x double> %c + ) + ret <2 x double> %v +} + +; CHECK-LABEL: qfms_v2f64: +; SIMD128-NEXT: .functype qfms_v2f64 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.qfms.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @qfms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { + %v = call <2 x double> @llvm.wasm.qfms.v2f64( + <2 x double> %a, <2 x double> %b, <2 x double> %c + ) + ret <2 x double> %v +} diff --git a/test/MC/WebAssembly/simd-encodings.s b/test/MC/WebAssembly/simd-encodings.s index b2a39e7cce9..491b4844d7f 100644 --- a/test/MC/WebAssembly/simd-encodings.s +++ b/test/MC/WebAssembly/simd-encodings.s @@ -382,6 +382,12 @@ main: # CHECK: f32x4.sqrt # encoding: [0xfd,0x97,0x01] f32x4.sqrt + # CHECK: f32x4.qfma # encoding: [0xfd,0x98,0x01] + f32x4.qfma + + # CHECK: f32x4.qfms # encoding: [0xfd,0x99,0x01] + f32x4.qfms + # CHECK: f32x4.add # encoding: [0xfd,0x9a,0x01] f32x4.add @@ -409,6 +415,12 @@ main: # CHECK: f64x2.sqrt # encoding: [0xfd,0xa2,0x01] f64x2.sqrt + # CHECK: f64x2.qfma # encoding: [0xfd,0xa3,0x01] + f64x2.qfma + + # CHECK: f64x2.qfms # encoding: [0xfd,0xa4,0x01] + f64x2.qfms + # CHECK: f64x2.add # encoding: [0xfd,0xa5,0x01] f64x2.add