From efe72cdf194d5aa0ba9be8fcd9dfa505254a6a46 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Thu, 16 May 2019 09:33:44 +0000 Subject: [PATCH] [AArch64][SVE2] Asm: implement CDOT instruction Summary: The complex DOT instructions perform a dot-product on quadtuplets from two source vectors and the resuling wide real or wide imaginary is accumulated into the destination register. The instructions come in two forms: Vector form, e.g. cdot z0.s, z1.b, z2.b, #90 - complex dot product on four 8-bit quad-tuplets, accumulating results in 32-bit elements. The complex numbers in the second source vector are rotated by 90 degrees. cdot z0.d, z1.h, z2.h, #180 - complex dot product on four 16-bit quad-tuplets, accumulating results in 64-bit elements. The complex numbers in the second source vector are rotated by 180 degrees. Indexed form, e.g. cdot z0.s, z1.b, z2.b[3], #0 - complex dot product on four 8-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 32-bit elements. cdot z0.d, z1.h, z2.h[1], #0 - complex dot product on four 16-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 64-bit elements. The specification can be found here: https://developer.arm.com/docs/ddi0602/latest Reviewed By: SjoerdMeijer, rovka Differential Revision: https://reviews.llvm.org/D61903 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360870 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 ++ lib/Target/AArch64/SVEInstrFormats.td | 73 +++++++++++++++ test/MC/AArch64/SVE2/cdot-diagnostics.s | 103 ++++++++++++++++++++++ test/MC/AArch64/SVE2/cdot.s | 96 ++++++++++++++++++++ 4 files changed, 278 insertions(+) create mode 100644 test/MC/AArch64/SVE2/cdot-diagnostics.s create mode 100644 test/MC/AArch64/SVE2/cdot.s diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 76479350b5d..5d44a5a82dc 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1051,4 +1051,10 @@ let Predicates = [HasSVE2] in { defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">; defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">; def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>; + + // SVE2 complex integer dot product (indexed) + defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">; + + // SVE2 complex integer dot product + defm CDOT_ZZZ : sve2_cintx_dot<"cdot">; } diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index 1c9af964887..736ddf60596 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -1837,6 +1837,79 @@ multiclass sve_intx_dot_by_indexed_elem { } } +//===----------------------------------------------------------------------===// +// SVE2 Complex Integer Dot Product Group +//===----------------------------------------------------------------------===// + +class sve2_complex_int_arith sz, bits<4> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm, + complexrotateop:$rot), + asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + bits<2> rot; + let Inst{31-24} = 0b01000100; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-12} = opc; + let Inst{11-10} = rot; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve2_cintx_dot { + def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>; + def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>; +} + +//===----------------------------------------------------------------------===// +// SVE2 Complex Integer Dot Product - Indexed Group +//===----------------------------------------------------------------------===// + +class sve2_complex_int_arith_indexed sz, bits<4> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2, + ZPRRegOp zprty3, Operand itype> +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop, + complexrotateop:$rot), + asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<2> rot; + let Inst{31-24} = 0b01000100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-12} = opc; + let Inst{11-10} = rot; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve2_cintx_dot_by_indexed_elem { + def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { + bits<2> iop; + bits<3> Zm; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + bit iop; + bits<4> Zm; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + //===----------------------------------------------------------------------===// // SVE2 Integer Multiply - Unpredicated Group //===----------------------------------------------------------------------===// diff --git a/test/MC/AArch64/SVE2/cdot-diagnostics.s b/test/MC/AArch64/SVE2/cdot-diagnostics.s new file mode 100644 index 00000000000..569f27701e3 --- /dev/null +++ b/test/MC/AArch64/SVE2/cdot-diagnostics.s @@ -0,0 +1,103 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s + + +// ------------------------------------------------------------------------- // +// Invalid element size + +cdot z0.s, z1.h, z31.h, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cdot z0.s, z1.h, z31.h, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.s, z1.s, z31.s, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cdot z0.s, z1.s, z31.s, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.s, z1.d, z31.d, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cdot z0.s, z1.d, z31.d, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.b, z31.b, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cdot z0.d, z1.b, z31.b, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.s, z31.s, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cdot z0.d, z1.s, z31.s, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.d, z31.d, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cdot z0.d, z1.d, z31.d, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +// ------------------------------------------------------------------------- // +// Invalid restricted register for indexed vector. + +cdot z0.s, z1.b, z8.b[3], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: cdot z0.s, z1.b, z8.b[3], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.h, z16.h[1], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: cdot z0.d, z1.h, z16.h[1], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +// ------------------------------------------------------------------------- // +// Invalid element index + +cdot z0.s, z1.b, z7.b[-1], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: cdot z0.s, z1.b, z7.b[-1], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.s, z1.b, z7.b[4], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: cdot z0.s, z1.b, z7.b[4], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.h, z15.h[-1], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: cdot z0.d, z1.h, z15.h[-1], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.h, z15.h[2], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: cdot z0.d, z1.h, z15.h[2], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +// --------------------------------------------------------------------------// +// Invalid rotation + +cdot z0.s, z1.b, z2.b[0], #360 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270. +// CHECK-NEXT: cdot z0.s, z1.b, z2.b[0], #360 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cdot z0.d, z1.h, z2.h[0], #450 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270. +// CHECK-NEXT: cdot z0.d, z1.h, z2.h[0], #450 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +cdot z0.d, z1.h, z31.h, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: cdot z0.d, z1.h, z31.h, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.d, p0/z, z7.d +cdot z0.d, z1.h, z15.h[1], #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: cdot z0.d, z1.h, z15.h[1], #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/test/MC/AArch64/SVE2/cdot.s b/test/MC/AArch64/SVE2/cdot.s new file mode 100644 index 00000000000..5f81bbbcd6f --- /dev/null +++ b/test/MC/AArch64/SVE2/cdot.s @@ -0,0 +1,96 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ +// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +cdot z0.s, z1.b, z31.b, #0 +// CHECK-INST: cdot z0.s, z1.b, z31.b, #0 +// CHECK-ENCODING: [0x20,0x10,0x9f,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 10 9f 44 + +cdot z0.d, z1.h, z31.h, #0 +// CHECK-INST: cdot z0.d, z1.h, z31.h, #0 +// CHECK-ENCODING: [0x20,0x10,0xdf,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 10 df 44 + +cdot z0.d, z1.h, z31.h, #90 +// CHECK-INST: cdot z0.d, z1.h, z31.h, #90 +// CHECK-ENCODING: [0x20,0x14,0xdf,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 14 df 44 + +cdot z0.d, z1.h, z31.h, #180 +// CHECK-INST: cdot z0.d, z1.h, z31.h, #180 +// CHECK-ENCODING: [0x20,0x18,0xdf,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 18 df 44 + +cdot z0.d, z1.h, z31.h, #270 +// CHECK-INST: cdot z0.d, z1.h, z31.h, #270 +// CHECK-ENCODING: [0x20,0x1c,0xdf,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 1c df 44 + +cdot z0.s, z1.b, z7.b[3], #0 +// CHECK-INST: cdot z0.s, z1.b, z7.b[3], #0 +// CHECK-ENCODING: [0x20,0x40,0xbf,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 40 bf 44 + +cdot z0.d, z1.h, z15.h[1], #0 +// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0 +// CHECK-ENCODING: [0x20,0x40,0xff,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 40 ff 44 + +cdot z5.d, z6.h, z3.h[0], #90 +// CHECK-INST: cdot z5.d, z6.h, z3.h[0], #90 +// CHECK-ENCODING: [0xc5,0x44,0xe3,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: c5 44 e3 44 + +cdot z29.d, z30.h, z0.h[0], #180 +// CHECK-INST: cdot z29.d, z30.h, z0.h[0], #180 +// CHECK-ENCODING: [0xdd,0x4b,0xe0,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: dd 4b e0 44 + +cdot z31.d, z30.h, z7.h[1], #270 +// CHECK-INST: cdot z31.d, z30.h, z7.h[1], #270 +// CHECK-ENCODING: [0xdf,0x4f,0xf7,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: df 4f f7 44 + + +// --------------------------------------------------------------------------// +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve +// CHECK-UNKNOWN: e0 bc 20 04 + +cdot z0.d, z1.h, z31.h, #0 +// CHECK-INST: cdot z0.d, z1.h, z31.h, #0 +// CHECK-ENCODING: [0x20,0x10,0xdf,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 10 df 44 + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve +// CHECK-UNKNOWN: e0 bc 20 04 + +cdot z0.d, z1.h, z15.h[1], #0 +// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0 +// CHECK-ENCODING: [0x20,0x40,0xff,0x44] +// CHECK-ERROR: instruction requires: sve2 +// CHECK-UNKNOWN: 20 40 ff 44 -- 2.50.1