From aa6063f7615d735bf9f8590f4e7a5881328a6caa Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 11 Oct 2019 15:53:41 +0000 Subject: [PATCH] [AArch64][SVE] Implement sdot and udot (lane) intrinsics Summary: Implements the following arithmetic intrinsics: - int_aarch64_sve_sdot - int_aarch64_sve_sdot_lane - int_aarch64_sve_udot - int_aarch64_sve_udot_lane This patch includes tests for the Subdivide4Argument type added by D67549 Reviewers: sdesmalen, SjoerdMeijer, greened, rengolin, rovka Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, rkruppe, psnobl, cfe-commits, llvm-commits Differential Revision: https://reviews.llvm.org/D67551 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374566 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAArch64.td | 21 +++++ lib/Target/AArch64/AArch64InstrFormats.td | 30 +++--- lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 +- lib/Target/AArch64/SVEInstrFormats.td | 19 ++-- .../AArch64/sve-intrinsics-int-arith.ll | 93 +++++++++++++++++++ 5 files changed, 149 insertions(+), 22 deletions(-) diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index e73f5b8b2b0..e0e780aa524 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -780,6 +780,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [llvm_anyvector_ty], [IntrNoMem]>; + class AdvSIMD_SVE_DOT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_DOT_Indexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>, + llvm_i32_ty], + [IntrNoMem]>; + // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. class Builtin_SVCVT @@ -799,6 +814,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + +def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + // // Counting bits // diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index af61521d4b4..f555e412330 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1011,8 +1011,8 @@ class AsmVectorIndex : AsmOperandClass { let RenderMethod = "addVectorIndexOperands"; } -class AsmVectorIndexOpnd - : Operand, ImmLeaf { +class AsmVectorIndexOpnd + : Operand, ImmLeaf { let ParserMatchClass = mc; let PrintMethod = "printVectorIndex"; } @@ -1023,11 +1023,17 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>; def VectorIndexSOperand : AsmVectorIndex<0, 3>; def VectorIndexDOperand : AsmVectorIndex<0, 1>; -def VectorIndex1 : AsmVectorIndexOpnd; -def VectorIndexB : AsmVectorIndexOpnd; -def VectorIndexH : AsmVectorIndexOpnd; -def VectorIndexS : AsmVectorIndexOpnd; -def VectorIndexD : AsmVectorIndexOpnd; +def VectorIndex1 : AsmVectorIndexOpnd; +def VectorIndexB : AsmVectorIndexOpnd; +def VectorIndexH : AsmVectorIndexOpnd; +def VectorIndexS : AsmVectorIndexOpnd; +def VectorIndexD : AsmVectorIndexOpnd; + +def VectorIndex132b : AsmVectorIndexOpnd; +def VectorIndexB32b : AsmVectorIndexOpnd; +def VectorIndexH32b : AsmVectorIndexOpnd; +def VectorIndexS32b : AsmVectorIndexOpnd; +def VectorIndexD32b : AsmVectorIndexOpnd; def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; @@ -1036,15 +1042,15 @@ def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">; def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; def sve_elm_idx_extdup_b - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_h - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_s - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_d - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_q - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; // 8-bit immediate for AdvSIMD where 64-bit values of the form: // aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 1657a76a685..5477e9f540a 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -82,11 +82,11 @@ let Predicates = [HasSVE] in { defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">; defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">; - defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">; - defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">; + defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; + defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; - defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">; - defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">; + defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; + defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index e2bd47ee6ae..ec5ebfbbfad 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -2024,12 +2024,14 @@ class sve_intx_dot { +multiclass sve_intx_dot { def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; + + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2054,22 +2056,27 @@ class sve_intx_dot_by_indexed_elem { - def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { +multiclass sve_intx_dot_by_indexed_elem { + def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> { bits<2> iop; bits<3> Zm; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> { bits<1> iop; bits<4> Zm; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>; + def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))), + (!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll b/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll index 08e475b2279..6ddd42a5819 100644 --- a/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll +++ b/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll @@ -88,6 +88,87 @@ define @neg_i64( %a, %pg, ret %out } +; SDOT + +define @sdot_i32( %a, %b, %c) { +; CHECK-LABEL: sdot_i32: +; CHECK: sdot z0.s, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdot.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sdot_i64( %a, %b, %c) { +; CHECK-LABEL: sdot_i64: +; CHECK: sdot z0.d, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdot.nxv2i64( %a, + %b, + %c) + ret %out +} + +; SDOT (Indexed) + +define @sdot_lane_i32( %a, %b, %c) { +; CHECK-LABEL: sdot_lane_i32: +; CHECK: sdot z0.s, z1.b, z2.b[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdot.lane.nxv4i32( %a, + %b, + %c, + i32 2) + ret %out +} + +define @sdot_lane_i64( %a, %b, %c) { +; CHECK-LABEL: sdot_lane_i64: +; CHECK: sdot z0.d, z1.h, z2.h[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdot.lane.nxv2i64( %a, + %b, + %c, + i32 1) + ret %out +} + +; UDOT + +define @udot_i32( %a, %b, %c) { +; CHECK-LABEL: udot_i32: +; CHECK: udot z0.s, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udot.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @udot_i64( %a, %b, %c) { +; CHECK-LABEL: udot_i64: +; CHECK: udot z0.d, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udot.nxv2i64( %a, + %b, + %c) + ret %out +} + +; UDOT (Indexed) + +define @udot_lane_i32( %a, %b, %c) { +; CHECK-LABEL: udot_lane_i32: +; CHECK: udot z0.s, z1.b, z2.b[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udot.lane.nxv4i32( %a, + %b, + %c, + i32 2) + ret %out +} + declare @llvm.aarch64.sve.abs.nxv16i8(, , ) declare @llvm.aarch64.sve.abs.nxv8i16(, , ) declare @llvm.aarch64.sve.abs.nxv4i32(, , ) @@ -97,3 +178,15 @@ declare @llvm.aarch64.sve.neg.nxv16i8(, @llvm.aarch64.sve.neg.nxv8i16(, , ) declare @llvm.aarch64.sve.neg.nxv4i32(, , ) declare @llvm.aarch64.sve.neg.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sdot.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdot.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sdot.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sdot.lane.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.udot.nxv4i32(, , ) +declare @llvm.aarch64.sve.udot.nxv2i64(, , ) + +declare @llvm.aarch64.sve.udot.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.udot.lane.nxv2i64(, , , i32) -- 2.50.1