// file will be generated. See ARM document DUI0348B.
//
//===----------------------------------------------------------------------===//
+//
+// Each intrinsic is a subclass of the Inst class. An intrinsic can either
+// generate a __builtin_* call or it can expand to a set of generic operations.
+//
+// The operations are subclasses of Operation providing a list of DAGs, the
+// last of which is the return value. The available DAG nodes are documented
+// below.
+//
+//===----------------------------------------------------------------------===//
+
+// The base Operation class. All operations must subclass this.
+class Operation<list<dag> ops=[]> {
+ list<dag> Ops = ops;
+ bit Unavailable = 0;
+}
+// An operation that only contains a single DAG.
+class Op<dag op> : Operation<[op]>;
+// A shorter version of Operation - takes a list of DAGs. The last of these will
+// be the return value.
+class LOp<list<dag> ops> : Operation<ops>;
+
+// These defs and classes are used internally to implement the SetTheory
+// expansion and should be ignored.
+foreach Index = 0-63 in
+ def sv##Index;
+class MaskExpand;
+
+//===----------------------------------------------------------------------===//
+// Available operations
+//===----------------------------------------------------------------------===//
+
+// DAG arguments can either be operations (documented below) or variables.
+// Variables are prefixed with '$'. There are variables for each input argument,
+// with the name $pN, where N starts at zero. So the zero'th argument will be
+// $p0, the first $p1 etc.
+
+// op - Binary or unary operator, depending on the number of arguments. The
+// operator itself is just treated as a raw string and is not checked.
+// example: (op "+", $p0, $p1) -> "__p0 + __p1".
+// (op "-", $p0) -> "-__p0"
+def op;
+// call - Invoke another intrinsic. The input types are type checked and
+// disambiguated. If there is no intrinsic defined that takes
+// the given types (or if there is a type ambiguity) an error is
+// generated at tblgen time. The name of the intrinsic is the raw
+// name as given to the Inst class (not mangled).
+// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
+// (assuming $p0 has type int16x8_t).
+def call;
+// cast - Perform a cast to a different type. This gets emitted as a static
+// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
+// "bitcast".
+//
+// The syntax is (cast MOD* VAL). The last argument is the value to
+// cast, preceded by a sequence of type modifiers. The target type
+// starts off as the type of VAL, and is modified by MOD in sequence.
+// The available modifiers are:
+// - $X - Take the type of parameter/variable X. For example:
+// (cast $p0, $p1) would cast $p1 to the type of $p0.
+// - "R" - The type of the return type.
+// - A typedef string - A NEON or stdint.h type that is then parsed.
+// for example: (cast "uint32x4_t", $p0).
+// - "U" - Make the type unsigned.
+// - "S" - Make the type signed.
+// - "H" - Halve the number of lanes in the type.
+// - "D" - Double the number of lanes in the type.
+// - "8" - Convert type to an equivalent vector of 8-bit signed
+// integers.
+// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
+// value is of type "int32x4_t".
+// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
+// has type float64x1_t or any other vector type of 64 bits).
+// (cast "int32_t", $p2) -> "(int32_t)__p2"
+def cast;
+// bitcast - Same as "cast", except a reinterpret-cast is produced:
+// (bitcast "T", $p0) -> "*(T*)&__p0".
+// The VAL argument is saved to a temprary so it can be used
+// as an l-value.
+def bitcast;
+// dup - Take a scalar argument and create a vector by duplicating it into
+// all lanes. The type of the vector is the base type of the intrinsic.
+// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
+// is uint32x2_t).
+def dup;
+// splat - Take a vector and a lane index, and return a vector of the same type
+// containing repeated instances of the source vector at the lane index.
+// example: (splat $p0, $p1) ->
+// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
+// (assuming __p0 has four elements).
+def splat;
+// save_temp - Create a temporary (local) variable. The variable takes a name
+// based on the zero'th parameter and can be referenced using
+// using that name in subsequent DAGs in the same
+// operation. The scope of a temp is the operation. If a variable
+// with the given name already exists, an error will be given at
+// tblgen time.
+// example: [(save_temp $var, (call "foo", $p0)),
+// (op "+", $var, $p1)] ->
+// "int32x2_t __var = foo(__p0); return __var + __p1;"
+def save_temp;
+// name_replace - Return the name of the current intrinsic with the first
+// argument replaced by the second argument. Raises an error if
+// the first argument does not exist in the intrinsic name.
+// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
+// version of this intrinsic).
+def name_replace;
+// literal - Create a literal piece of code. The code is treated as a raw
+// string, and must be given a type. The type is a stdint.h or
+// NEON intrinsic type as given to (cast).
+// example: (literal "int32_t", "0")
+def literal;
+// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
+// The MASK argument is a set of elements. The elements are generated
+// from the two special defs "mask0" and "mask1". "mask0" expands to
+// the lane indices in sequence for ARG0, and "mask1" expands to
+// the lane indices in sequence for ARG1. They can be used as-is, e.g.
+//
+// (shuffle $p0, $p1, mask0) -> $p0
+// (shuffle $p0, $p1, mask1) -> $p1
+//
+// or, more usefully, they can be manipulated using the SetTheory
+// operators plus some extra operators defined in the NEON emitter.
+// The operators are described below.
+// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
+// A concatenation of the high halves of the input vectors.
+def shuffle;
+
+// add, interleave, decimate: These set operators are vanilla SetTheory
+// operators and take their normal definition.
+def add;
+def interleave;
+def decimate;
+// rotl - Rotate set left by a number of elements.
+// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
+def rotl;
+// rotl - Rotate set right by a number of elements.
+// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
+def rotr;
+// highhalf - Take only the high half of the input.
+// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
+def highhalf;
+// highhalf - Take only the low half of the input.
+// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
+def lowhalf;
+// rev - Perform a variable-width reversal of the elements. The zero'th argument
+// is a width in bits to reverse. The lanes this maps to is determined
+// based on the element width of the underlying type.
+// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
+// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements)
+def rev;
+// mask0 - The initial sequence of lanes for shuffle ARG0
+def mask0 : MaskExpand;
+// mask0 - The initial sequence of lanes for shuffle ARG1
+def mask1 : MaskExpand;
+
+def OP_NONE : Operation;
+def OP_UNAVAILABLE : Operation {
+ let Unavailable = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions
+//===----------------------------------------------------------------------===//
+
+// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
+// a sequence of typespecs.
+//
+// The name is the base name of the intrinsic, for example "vget_lane". This is
+// then mangled by the tblgen backend to add type information ("vget_lane_s16").
+//
+// A typespec is a sequence of uppercase characters (modifiers) followed by one
+// lowercase character. A typespec encodes a particular "base type" of the
+// intrinsic.
+//
+// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
+// typespec codes are given below.
+//
+// The string given to an Inst class is a sequence of typespecs. The intrinsic
+// is instantiated for every typespec in the sequence. For example "sdQsQd".
+//
+// The prototype is a string that defines the return type of the intrinsic
+// and the type of each argument. The return type and every argument gets a
+// "modifier" that can change in some way the "base type" of the intrinsic.
+//
+// The modifier 'd' means "default" and does not modify the base type in any
+// way. The available modifiers are given below.
+//
+// Typespecs
+// ---------
+// c: char
+// s: short
+// i: int
+// l: long
+// k: 128-bit long
+// f: float
+// h: half-float
+// d: double
+//
+// Typespec modifiers
+// ------------------
+// S: scalar, only used for function mangling.
+// U: unsigned
+// Q: 128b
+// H: 128b without mangling 'q'
+// P: polynomial
+//
+// Prototype modifiers
+// -------------------
+// prototype: return (arg, arg, ...)
+//
+// v: void
+// t: best-fit integer (int/poly args)
+// x: signed integer (int/float args)
+// u: unsigned integer (int/float args)
+// f: float (int args)
+// F: double (int args)
+// d: default
+// g: default, ignore 'Q' size modifier.
+// j: default, force 'Q' size modifier.
+// w: double width elements, same num elts
+// n: double width elements, half num elts
+// h: half width elements, double num elts
+// q: half width elements, quad num elts
+// e: half width elements, double num elts, unsigned
+// m: half width elements, same num elts
+// i: constant int
+// l: constant uint64
+// s: scalar of element type
+// z: scalar of half width element type, signed
+// r: scalar of double width element type, signed
+// a: scalar of element type (splat to vector type)
+// b: scalar of unsigned integer/long type (int/float args)
+// $: scalar of signed integer/long type (int/float args)
+// y: scalar of float
+// o: scalar of double
+// k: default elt width, double num elts
+// 2,3,4: array of default vectors
+// B,C,D: array of default elts, force 'Q' size modifier.
+// p: pointer type
+// c: const pointer type
-class Op;
-
-def OP_NONE : Op;
-def OP_UNAVAILABLE : Op;
-def OP_ADD : Op;
-def OP_ADDL : Op;
-def OP_ADDLHi : Op;
-def OP_ADDW : Op;
-def OP_ADDWHi : Op;
-def OP_SUB : Op;
-def OP_SUBL : Op;
-def OP_SUBLHi : Op;
-def OP_SUBW : Op;
-def OP_SUBWHi : Op;
-def OP_MUL : Op;
-def OP_MLA : Op;
-def OP_MLAL : Op;
-def OP_MULLHi : Op;
-def OP_MULLHi_P64 : Op;
-def OP_MULLHi_N : Op;
-def OP_MLALHi : Op;
-def OP_MLALHi_N : Op;
-def OP_MLS : Op;
-def OP_MLSL : Op;
-def OP_MLSLHi : Op;
-def OP_MLSLHi_N : Op;
-def OP_MUL_N : Op;
-def OP_MLA_N : Op;
-def OP_MLS_N : Op;
-def OP_FMLA_N : Op;
-def OP_FMLS_N : Op;
-def OP_MLAL_N : Op;
-def OP_MLSL_N : Op;
-def OP_MUL_LN: Op;
-def OP_MULX_LN: Op;
-def OP_MULL_LN : Op;
-def OP_MULLHi_LN : Op;
-def OP_MLA_LN: Op;
-def OP_MLS_LN: Op;
-def OP_MLAL_LN : Op;
-def OP_MLALHi_LN : Op;
-def OP_MLSL_LN : Op;
-def OP_MLSLHi_LN : Op;
-def OP_QDMULL_LN : Op;
-def OP_QDMULLHi_LN : Op;
-def OP_QDMLAL_LN : Op;
-def OP_QDMLALHi_LN : Op;
-def OP_QDMLSL_LN : Op;
-def OP_QDMLSLHi_LN : Op;
-def OP_QDMULH_LN : Op;
-def OP_QRDMULH_LN : Op;
-def OP_FMS_LN : Op;
-def OP_FMS_LNQ : Op;
-def OP_TRN1 : Op;
-def OP_ZIP1 : Op;
-def OP_UZP1 : Op;
-def OP_TRN2 : Op;
-def OP_ZIP2 : Op;
-def OP_UZP2 : Op;
-def OP_EQ : Op;
-def OP_GE : Op;
-def OP_LE : Op;
-def OP_GT : Op;
-def OP_LT : Op;
-def OP_NEG : Op;
-def OP_NOT : Op;
-def OP_AND : Op;
-def OP_OR : Op;
-def OP_XOR : Op;
-def OP_ANDN : Op;
-def OP_ORN : Op;
-def OP_CAST : Op;
-def OP_HI : Op;
-def OP_LO : Op;
-def OP_CONC : Op;
-def OP_DUP : Op;
-def OP_DUP_LN: Op;
-def OP_SEL : Op;
-def OP_REV64 : Op;
-def OP_REV32 : Op;
-def OP_REV16 : Op;
-def OP_XTN : Op;
-def OP_SQXTUN : Op;
-def OP_QXTN : Op;
-def OP_VCVT_NA_HI : Op;
-def OP_VCVT_EX_HI : Op;
-def OP_VCVTX_HI : Op;
-def OP_REINT : Op;
-def OP_ADDHNHi : Op;
-def OP_RADDHNHi : Op;
-def OP_SUBHNHi : Op;
-def OP_RSUBHNHi : Op;
-def OP_ABDL : Op;
-def OP_ABDLHi : Op;
-def OP_ABA : Op;
-def OP_ABAL : Op;
-def OP_ABALHi : Op;
-def OP_QDMULLHi : Op;
-def OP_QDMULLHi_N : Op;
-def OP_QDMLALHi : Op;
-def OP_QDMLALHi_N : Op;
-def OP_QDMLSLHi : Op;
-def OP_QDMLSLHi_N : Op;
-def OP_DIV : Op;
-def OP_LONG_HI : Op;
-def OP_NARROW_HI : Op;
-def OP_MOVL_HI : Op;
-def OP_COPY_LN : Op;
-def OP_COPYQ_LN : Op;
-def OP_COPY_LNQ : Op;
-def OP_SCALAR_MUL_LN : Op;
-def OP_SCALAR_MUL_LNQ : Op;
-def OP_SCALAR_MULX_LN : Op;
-def OP_SCALAR_MULX_LNQ : Op;
-def OP_SCALAR_VMULX_LN : Op;
-def OP_SCALAR_VMULX_LNQ : Op;
-def OP_SCALAR_QDMULL_LN : Op;
-def OP_SCALAR_QDMULL_LNQ : Op;
-def OP_SCALAR_QDMULH_LN : Op;
-def OP_SCALAR_QDMULH_LNQ : Op;
-def OP_SCALAR_QRDMULH_LN : Op;
-def OP_SCALAR_QRDMULH_LNQ : Op;
-def OP_SCALAR_GET_LN : Op;
-def OP_SCALAR_SET_LN : Op;
-
-class Inst <string n, string p, string t, Op o> {
+// Every intrinsic subclasses Inst.
+class Inst <string n, string p, string t, Operation o> {
string Name = n;
string Prototype = p;
string Types = t;
string ArchGuard = "";
- Op Operand = o;
+ Operation Operation = o;
+ bit CartesianProductOfTypes = 0;
bit isShift = 0;
bit isScalarShift = 0;
bit isScalarNarrowShift = 0;
// WOpInst: Instruction with bit size only suffix (e.g., "8").
// LOpInst: Logical instruction with no bit size suffix.
// NoTestOpInst: Intrinsic that has no corresponding instruction.
-class SOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
-class IOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
-class WOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
-class LOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
-class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
+class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
-// prototype: return (arg, arg, ...)
-// v: void
-// t: best-fit integer (int/poly args)
-// x: signed integer (int/float args)
-// u: unsigned integer (int/float args)
-// f: float (int args)
-// F: double (int args)
-// d: default
-// g: default, ignore 'Q' size modifier.
-// j: default, force 'Q' size modifier.
-// w: double width elements, same num elts
-// n: double width elements, half num elts
-// h: half width elements, double num elts
-// q: half width elements, quad num elts
-// e: half width elements, double num elts, unsigned
-// m: half width elements, same num elts
-// i: constant int
-// l: constant uint64
-// s: scalar of element type
-// z: scalar of half width element type, signed
-// r: scalar of double width element type, signed
-// a: scalar of element type (splat to vector type)
-// b: scalar of unsigned integer/long type (int/float args)
-// $: scalar of signed integer/long type (int/float args)
-// y: scalar of float
-// o: scalar of double
-// k: default elt width, double num elts
-// 2,3,4: array of default vectors
-// B,C,D: array of default elts, force 'Q' size modifier.
-// p: pointer type
-// c: const pointer type
+//===----------------------------------------------------------------------===//
+// Operations
+//===----------------------------------------------------------------------===//
-// sizes:
-// c: char
-// s: short
-// i: int
-// l: long
-// k: 128-bit long
-// f: float
-// h: half-float
-// d: double
+def OP_ADD : Op<(op "+", $p0, $p1)>;
+def OP_ADDL : Op<(op "+", (call "vmovl", $p0), (call "vmovl", $p1))>;
+def OP_ADDLHi : Op<(op "+", (call "vmovl_high", $p0),
+ (call "vmovl_high", $p1))>;
+def OP_ADDW : Op<(op "+", $p0, (call "vmovl", $p1))>;
+def OP_ADDWHi : Op<(op "+", $p0, (call "vmovl_high", $p1))>;
+def OP_SUB : Op<(op "-", $p0, $p1)>;
+def OP_SUBL : Op<(op "-", (call "vmovl", $p0), (call "vmovl", $p1))>;
+def OP_SUBLHi : Op<(op "-", (call "vmovl_high", $p0),
+ (call "vmovl_high", $p1))>;
+def OP_SUBW : Op<(op "-", $p0, (call "vmovl", $p1))>;
+def OP_SUBWHi : Op<(op "-", $p0, (call "vmovl_high", $p1))>;
+def OP_MUL : Op<(op "*", $p0, $p1)>;
+def OP_MLA : Op<(op "+", $p0, (op "*", $p1, $p2))>;
+def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
+def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
+ (call "vget_high", $p1))>;
+def OP_MULLHi_P64 : Op<(call "vmull",
+ (cast "poly64_t", (call "vget_high", $p0)),
+ (cast "poly64_t", (call "vget_high", $p1)))>;
+def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
+def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
+ (call "vget_high", $p2))>;
+def OP_MLALHi_N : Op<(call "vmlal_n", $p0, (call "vget_high", $p1), $p2)>;
+def OP_MLS : Op<(op "-", $p0, (op "*", $p1, $p2))>;
+def OP_MLSL : Op<(op "-", $p0, (call "vmull", $p1, $p2))>;
+def OP_MLSLHi : Op<(call "vmlsl", $p0, (call "vget_high", $p1),
+ (call "vget_high", $p2))>;
+def OP_MLSLHi_N : Op<(call "vmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
+def OP_MUL_N : Op<(op "*", $p0, (dup $p1))>;
+def OP_MLA_N : Op<(op "+", $p0, (op "*", $p1, (dup $p2)))>;
+def OP_MLS_N : Op<(op "-", $p0, (op "*", $p1, (dup $p2)))>;
+def OP_FMLA_N : Op<(call "vfma", $p0, $p1, (dup $p2))>;
+def OP_FMLS_N : Op<(call "vfms", $p0, $p1, (dup $p2))>;
+def OP_MLAL_N : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>;
+def OP_MLSL_N : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>;
+def OP_MUL_LN : Op<(op "*", $p0, (splat $p1, $p2))>;
+def OP_MULX_LN : Op<(call "vmulx", $p0, (splat $p1, $p2))>;
+def OP_MULL_LN : Op<(call "vmull", $p0, (splat $p1, $p2))>;
+def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (splat $p1, $p2))>;
+def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (splat $p2, $p3)))>;
+def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (splat $p2, $p3)))>;
+def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (splat $p2, $p3)))>;
+def OP_MLALHi_LN: Op<(op "+", $p0, (call "vmull", (call "vget_high", $p1),
+ (splat $p2, $p3)))>;
+def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (splat $p2, $p3)))>;
+def OP_MLSLHi_LN : Op<(op "-", $p0, (call "vmull", (call "vget_high", $p1),
+ (splat $p2, $p3)))>;
+def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (splat $p1, $p2))>;
+def OP_QDMULLHi_LN : Op<(call "vqdmull", (call "vget_high", $p0),
+ (splat $p1, $p2))>;
+def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (splat $p2, $p3))>;
+def OP_QDMLALHi_LN : Op<(call "vqdmlal", $p0, (call "vget_high", $p1),
+ (splat $p2, $p3))>;
+def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (splat $p2, $p3))>;
+def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
+ (splat $p2, $p3))>;
+def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>;
+def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>;
+def OP_FMS_LN : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>;
+def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>;
+def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
+ (decimate mask1, 2)))>;
+def OP_ZIP1 : Op<(shuffle $p0, $p1, (lowhalf (interleave mask0, mask1)))>;
+def OP_UZP1 : Op<(shuffle $p0, $p1, (add (decimate mask0, 2),
+ (decimate mask1, 2)))>;
+def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
+ (decimate (rotl mask0, 1), 2),
+ (decimate (rotl mask1, 1), 2)))>;
+def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
+def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
+ (decimate (rotl mask1, 1), 2)))>;
+def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
+def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
+def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
+def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
+def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
+def OP_NEG : Op<(op "-", $p0)>;
+def OP_NOT : Op<(op "~", $p0)>;
+def OP_AND : Op<(op "&", $p0, $p1)>;
+def OP_OR : Op<(op "|", $p0, $p1)>;
+def OP_XOR : Op<(op "^", $p0, $p1)>;
+def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
+def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
+def OP_CAST : Op<(cast "R", $p0)>;
+def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
+def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
+def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
+def OP_DUP : Op<(dup $p0)>;
+def OP_DUP_LN : Op<(splat $p0, $p1)>;
+def OP_SEL : Op<(cast "R", (op "|",
+ (op "&", $p0, (cast $p0, $p1)),
+ (op "&", (op "~", $p0), (cast $p0, $p2))))>;
+def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
+def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
+def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
+def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
+def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
+ (call "vqmovun", $p1))>;
+def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
+def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16", $p1))>;
+def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
+def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
+def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
+def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
+def OP_REINT : Op<(cast "R", $p0)>;
+def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
+def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
+def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
+def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
+def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
+ (call "vabd", $p0, $p1))))>;
+def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
+ (call "vget_high", $p1))>;
+def OP_ABA : Op<(op "+", $p0, (call "vabd", $p1, $p2))>;
+def OP_ABAL : Op<(op "+", $p0, (call "vabdl", $p1, $p2))>;
+def OP_ABALHi : Op<(call "vabal", $p0, (call "vget_high", $p1),
+ (call "vget_high", $p2))>;
+def OP_QDMULLHi : Op<(call "vqdmull", (call "vget_high", $p0),
+ (call "vget_high", $p1))>;
+def OP_QDMULLHi_N : Op<(call "vqdmull_n", (call "vget_high", $p0), $p1)>;
+def OP_QDMLALHi : Op<(call "vqdmlal", $p0, (call "vget_high", $p1),
+ (call "vget_high", $p2))>;
+def OP_QDMLALHi_N : Op<(call "vqdmlal_n", $p0, (call "vget_high", $p1), $p2)>;
+def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
+ (call "vget_high", $p2))>;
+def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
+def OP_DIV : Op<(op "/", $p0, $p1)>;
+def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
+ (call "vget_high", $p0), $p1))>;
+def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
+ (cast "R", "H", $p0),
+ (cast "R", "H",
+ (call (name_replace "_high_", "_"),
+ $p1, $p2))))>;
+def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
+ (cast "R",
+ (call "vshll_n", $a1, (literal "int32_t", "0")))]>;
+def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
+def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
+def OP_SCALAR_MULX_LN : Op<(call "vmulx", $p0, (call "vget_lane", $p1, $p2))>;
+def OP_SCALAR_VMULX_LN : LOp<[(save_temp $x, (call "vget_lane", $p0,
+ (literal "int32_t", "0"))),
+ (save_temp $y, (call "vget_lane", $p1, $p2)),
+ (save_temp $z, (call "vmulx", $x, $y)),
+ (call "vset_lane", $z, $p0, $p2)]>;
+def OP_SCALAR_VMULX_LNQ : LOp<[(save_temp $x, (call "vget_lane", $p0,
+ (literal "int32_t", "0"))),
+ (save_temp $y, (call "vget_lane", $p1, $p2)),
+ (save_temp $z, (call "vmulx", $x, $y)),
+ (call "vset_lane", $z, $p0, (literal "int32_t",
+ "0"))]>;
+class ScalarMulOp<string opname> :
+ Op<(call opname, $p0, (call "vget_lane", $p1, $p2))>;
+
+def OP_SCALAR_QDMULL_LN : ScalarMulOp<"vqdmull">;
+def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">;
+def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">;
+
+def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t",
+ (call "vget_lane",
+ (bitcast "int16x4_t", $p0), $p1))>;
+def OP_SCALAR_HALF_GET_LNQ : Op<(bitcast "float16_t",
+ (call "vget_lane",
+ (bitcast "int16x8_t", $p0), $p1))>;
+def OP_SCALAR_HALF_SET_LN : Op<(bitcast "float16x4_t",
+ (call "vset_lane",
+ (bitcast "int16_t", $p0),
+ (bitcast "int16x4_t", $p1), $p2))>;
+def OP_SCALAR_HALF_SET_LNQ : Op<(bitcast "float16x8_t",
+ (call "vset_lane",
+ (bitcast "int16_t", $p0),
+ (bitcast "int16x8_t", $p1), $p2))>;
-// size modifiers:
-// S: scalar, only used for function mangling.
-// U: unsigned
-// Q: 128b
-// H: 128b without mangling 'q'
-// P: polynomial
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
////////////////////////////////////////////////////////////////////////////////
// E.3.1 Addition
// E.3.31 Vector reinterpret cast operations
def VREINTERPRET
: NoTestOpInst<"vreinterpret", "dd",
- "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT>;
+ "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> {
+ let CartesianProductOfTypes = 1;
+ let ArchGuard = "__ARM_ARCH < 8";
+}
////////////////////////////////////////////////////////////////////////////////
// Vector fused multiply-add operations
////////////////////////////////////////////////////////////////////////////////
// Converting vectors
-def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI>;
-def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI>;
-def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "mj", "d">;
-def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI>;
+def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI_F16>;
+def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>;
+def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">;
+def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>;
def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">;
def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">;
-def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI>;
+def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>;
def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">;
def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>;
def FRINTN : SInst<"vrndn", "dd", "fdQfQd">;
def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
"csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi",
- "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>;
+ "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki",
- "csilPcPsPlUcUsUiUlfd", OP_COPY_LNQ>;
+ "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>;
def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi",
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
////////////////////////////////////////////////////////////////////////////////
// Set all lanes to same value
def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>;
-def VDUP_LANE2: WOpInst<"vdup_laneq", "dki",
+def VDUP_LANE2: WOpInst<"vdup_laneq", "dji",
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
OP_DUP_LN>;
def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>;
// NeonEmitter implicitly takes the cartesian product of the type string with
// itself during generation so, unlike all other intrinsics, this one should
// include *all* types, not just additional ones.
-//
-// We also rely on NeonEmitter handling the 32-bit vreinterpret before the
-// 64-bit one so that the common casts don't get guarded as AArch64-only
-// (FIXME).
def VVREINTERPRET
: NoTestOpInst<"vreinterpret", "dd",
- "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT>;
-
+ "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> {
+ let CartesianProductOfTypes = 1;
+ let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)";
+}
////////////////////////////////////////////////////////////////////////////////
// Scalar Intrinsics
// Scalar Floating Point multiply (scalar, by element)
def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>;
-def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LNQ>;
+def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LN>;
// Scalar Floating Point multiply extended (scalar, by element)
def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>;
-def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LNQ>;
+def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LN>;
def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">;
// Signed Saturating Doubling Multiply Long (scalar by element)
def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>;
-def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LNQ>;
+def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LN>;
// Signed Saturating Doubling Multiply-Add Long (scalar by element)
def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">;
// Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>;
-def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LNQ>;
+def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LN>;
// Scalar Integer Saturating Rounding Doubling Multiply Half High
def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
-def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LNQ>;
+def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>;
def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
-def SCALAR_GET_LANE : IOpInst<"vget_lane", "sdi", "hQh", OP_SCALAR_GET_LN>;
-def SCALAR_SET_LANE : IOpInst<"vset_lane", "dsdi", "hQh", OP_SCALAR_SET_LN>;
+// FIXME: Rename so it is obvious this only applies to halfs.
+def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>;
+def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>;
+def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>;
+def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>;
}
// CodeGen library.
//
// Additional validation code can be generated by this file when runHeader() is
-// called, rather than the normal run() entry point. A complete set of tests
-// for Neon intrinsics can be generated by calling the runTests() entry point.
+// called, rather than the normal run() entry point.
+//
+// See also the documentation in include/clang/Basic/arm_neon.td.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <algorithm>
using namespace llvm;
-enum OpKind {
- OpNone,
- OpUnavailable,
- OpAdd,
- OpAddl,
- OpAddlHi,
- OpAddw,
- OpAddwHi,
- OpSub,
- OpSubl,
- OpSublHi,
- OpSubw,
- OpSubwHi,
- OpMul,
- OpMla,
- OpMlal,
- OpMullHi,
- OpMullHiP64,
- OpMullHiN,
- OpMlalHi,
- OpMlalHiN,
- OpMls,
- OpMlsl,
- OpMlslHi,
- OpMlslHiN,
- OpMulN,
- OpMlaN,
- OpMlsN,
- OpFMlaN,
- OpFMlsN,
- OpMlalN,
- OpMlslN,
- OpMulLane,
- OpMulXLane,
- OpMullLane,
- OpMullHiLane,
- OpMlaLane,
- OpMlsLane,
- OpMlalLane,
- OpMlalHiLane,
- OpMlslLane,
- OpMlslHiLane,
- OpQDMullLane,
- OpQDMullHiLane,
- OpQDMlalLane,
- OpQDMlalHiLane,
- OpQDMlslLane,
- OpQDMlslHiLane,
- OpQDMulhLane,
- OpQRDMulhLane,
- OpFMSLane,
- OpFMSLaneQ,
- OpTrn1,
- OpZip1,
- OpUzp1,
- OpTrn2,
- OpZip2,
- OpUzp2,
- OpEq,
- OpGe,
- OpLe,
- OpGt,
- OpLt,
- OpNeg,
- OpNot,
- OpAnd,
- OpOr,
- OpXor,
- OpAndNot,
- OpOrNot,
- OpCast,
- OpConcat,
- OpDup,
- OpDupLane,
- OpHi,
- OpLo,
- OpSelect,
- OpRev16,
- OpRev32,
- OpRev64,
- OpXtnHi,
- OpSqxtunHi,
- OpQxtnHi,
- OpFcvtnHi,
- OpFcvtlHi,
- OpFcvtxnHi,
- OpReinterpret,
- OpAddhnHi,
- OpRAddhnHi,
- OpSubhnHi,
- OpRSubhnHi,
- OpAbdl,
- OpAbdlHi,
- OpAba,
- OpAbal,
- OpAbalHi,
- OpQDMullHi,
- OpQDMullHiN,
- OpQDMlalHi,
- OpQDMlalHiN,
- OpQDMlslHi,
- OpQDMlslHiN,
- OpDiv,
- OpLongHi,
- OpNarrowHi,
- OpMovlHi,
- OpCopyLane,
- OpCopyQLane,
- OpCopyLaneQ,
- OpScalarMulLane,
- OpScalarMulLaneQ,
- OpScalarMulXLane,
- OpScalarMulXLaneQ,
- OpScalarVMulXLane,
- OpScalarVMulXLaneQ,
- OpScalarQDMullLane,
- OpScalarQDMullLaneQ,
- OpScalarQDMulHiLane,
- OpScalarQDMulHiLaneQ,
- OpScalarQRDMulHiLane,
- OpScalarQRDMulHiLaneQ,
- OpScalarGetLane,
- OpScalarSetLane
-};
+namespace {
+
+// While globals are generally bad, this one allows us to perform assertions
+// liberally and somehow still trace them back to the def they indirectly
+// came from.
+static Record *CurrentRecord = nullptr;
+static void assert_with_loc(bool Assertion, const std::string &Str) {
+ if (!Assertion) {
+ if (CurrentRecord)
+ PrintFatalError(CurrentRecord->getLoc(), Str);
+ else
+ PrintFatalError(Str);
+ }
+}
enum ClassKind {
ClassNone,
- ClassI, // generic integer instruction, e.g., "i8" suffix
- ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
- ClassW, // width-specific instruction, e.g., "8" suffix
- ClassB, // bitcast arguments with enum argument to specify type
- ClassL, // Logical instructions which are op instructions
- // but we need to not emit any suffix for in our
- // tests.
- ClassNoTest // Instructions which we do not test since they are
- // not TRUE instructions.
+ ClassI, // generic integer instruction, e.g., "i8" suffix
+ ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
+ ClassW, // width-specific instruction, e.g., "8" suffix
+ ClassB, // bitcast arguments with enum argument to specify type
+ ClassL, // Logical instructions which are op instructions
+ // but we need to not emit any suffix for in our
+ // tests.
+ ClassNoTest // Instructions which we do not test since they are
+ // not TRUE instructions.
};
/// NeonTypeFlags - Flags to identify the types for overloaded Neon
/// builtins. These must be kept in sync with the flags in
/// include/clang/Basic/TargetBuiltins.h.
-namespace {
-class NeonTypeFlags {
- enum {
- EltTypeMask = 0xf,
- UnsignedFlag = 0x10,
- QuadFlag = 0x20
- };
- uint32_t Flags;
+namespace NeonTypeFlags {
+enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
+
+enum EltType {
+ Int8,
+ Int16,
+ Int32,
+ Int64,
+ Poly8,
+ Poly16,
+ Poly64,
+ Poly128,
+ Float16,
+ Float32,
+ Float64
+};
+}
+
+class Intrinsic;
+class NeonEmitter;
+class Type;
+class Variable;
+//===----------------------------------------------------------------------===//
+// TypeSpec
+//===----------------------------------------------------------------------===//
+
+/// A TypeSpec is just a simple wrapper around a string, but gets its own type
+/// for strong typing purposes.
+///
+/// A TypeSpec can be used to create a type.
+class TypeSpec : public std::string {
public:
- enum EltType {
- Int8,
- Int16,
- Int32,
- Int64,
- Poly8,
- Poly16,
- Poly64,
- Poly128,
- Float16,
- Float32,
- Float64
- };
+ static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
+ std::vector<TypeSpec> Ret;
+ TypeSpec Acc;
+ for (char I : Str.str()) {
+ if (islower(I)) {
+ Acc.push_back(I);
+ Ret.push_back(TypeSpec(Acc));
+ Acc.clear();
+ } else {
+ Acc.push_back(I);
+ }
+ }
+ return Ret;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Type
+//===----------------------------------------------------------------------===//
+
+/// A Type. Not much more to say here.
+class Type {
+private:
+ TypeSpec TS;
+
+ bool Float, Signed, Void, Poly, Constant, Pointer;
+ // ScalarForMangling and NoManglingQ are really not suited to live here as
+ // they are not related to the type. But they live in the TypeSpec (not the
+ // prototype), so this is really the only place to store them.
+ bool ScalarForMangling, NoManglingQ;
+ unsigned Bitwidth, ElementBitwidth, NumVectors;
+
+public:
+ Type()
+ : Float(false), Signed(false), Void(true), Poly(false), Constant(false),
+ Pointer(false), ScalarForMangling(false), NoManglingQ(false),
+ Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
+
+ Type(TypeSpec TS, char CharMod)
+ : TS(TS), Float(false), Signed(false), Void(false), Poly(false),
+ Constant(false), Pointer(false), ScalarForMangling(false),
+ NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
+ applyModifier(CharMod);
+ }
+
+ /// Returns a type representing "void".
+ static Type getVoid() { return Type(); }
- NeonTypeFlags(unsigned F) : Flags(F) {}
- NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
- if (IsUnsigned)
- Flags |= UnsignedFlag;
- if (IsQuad)
- Flags |= QuadFlag;
+ bool operator==(const Type &Other) const { return str() == Other.str(); }
+ bool operator!=(const Type &Other) const { return !operator==(Other); }
+
+ //
+ // Query functions
+ //
+ bool isScalarForMangling() const { return ScalarForMangling; }
+ bool noManglingQ() const { return NoManglingQ; }
+
+ bool isPointer() const { return Pointer; }
+ bool isFloating() const { return Float; }
+ bool isInteger() const { return !Float && !Poly; }
+ bool isSigned() const { return Signed; }
+ bool isScalar() const { return NumVectors == 0; }
+ bool isVector() const { return NumVectors > 0; }
+ bool isFloat() const { return Float && ElementBitwidth == 32; }
+ bool isDouble() const { return Float && ElementBitwidth == 64; }
+ bool isHalf() const { return Float && ElementBitwidth == 16; }
+ bool isPoly() const { return Poly; }
+ bool isChar() const { return ElementBitwidth == 8; }
+ bool isShort() const { return !Float && ElementBitwidth == 16; }
+ bool isInt() const { return !Float && ElementBitwidth == 32; }
+ bool isLong() const { return !Float && ElementBitwidth == 64; }
+ bool isVoid() const { return Void; }
+ unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
+ unsigned getSizeInBits() const { return Bitwidth; }
+ unsigned getElementSizeInBits() const { return ElementBitwidth; }
+ unsigned getNumVectors() const { return NumVectors; }
+
+ //
+ // Mutator functions
+ //
+ void makeUnsigned() { Signed = false; }
+ void makeSigned() { Signed = true; }
+ void makeInteger(unsigned ElemWidth, bool Sign) {
+ Float = false;
+ Poly = false;
+ Signed = Sign;
+ ElementBitwidth = ElemWidth;
+ }
+ void makeScalar() {
+ Bitwidth = ElementBitwidth;
+ NumVectors = 0;
+ }
+ void makeOneVector() {
+ assert(isVector());
+ NumVectors = 1;
}
+ void doubleLanes() {
+ assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
+ Bitwidth = 128;
+ }
+ void halveLanes() {
+ assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
+ Bitwidth = 64;
+ }
+
+ /// Return the C string representation of a type, which is the typename
+ /// defined in stdint.h or arm_neon.h.
+ std::string str() const;
+
+ /// Return the string representation of a type, which is an encoded
+ /// string for passing to the BUILTIN() macro in Builtins.def.
+ std::string builtin_str() const;
- uint32_t getFlags() const { return Flags; }
+ /// Return the value in NeonTypeFlags for this type.
+ unsigned getNeonEnum() const;
+
+ /// Parse a type from a stdint.h or arm_neon.h typedef name,
+ /// for example uint32x2_t or int64_t.
+ static Type fromTypedefName(StringRef Name);
+
+private:
+ /// Creates the type based on the typespec string in TS.
+ /// Sets "Quad" to true if the "Q" or "H" modifiers were
+ /// seen. This is needed by applyModifier as some modifiers
+ /// only take effect if the type size was changed by "Q" or "H".
+ void applyTypespec(bool &Quad);
+ /// Applies a prototype modifier to the type.
+ void applyModifier(char Mod);
};
-} // end anonymous namespace
-namespace {
+//===----------------------------------------------------------------------===//
+// Variable
+//===----------------------------------------------------------------------===//
+
+/// A variable is a simple class that just has a type and a name.
+class Variable {
+ Type T;
+ std::string N;
+
+public:
+ Variable() : T(Type::getVoid()), N("") {}
+ Variable(Type T, std::string N) : T(T), N(N) {}
+
+ Type getType() const { return T; }
+ std::string getName() const { return "__" + N; }
+};
+
+//===----------------------------------------------------------------------===//
+// Intrinsic
+//===----------------------------------------------------------------------===//
+
+/// The main grunt class. This represents an instantiation of an intrinsic with
+/// a particular typespec and prototype.
+class Intrinsic {
+ /// The Record this intrinsic was created from.
+ Record *R;
+ /// The unmangled name and prototype.
+ std::string Name, Proto;
+ /// The input and output typespecs. InTS == OutTS except when
+ /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
+ TypeSpec OutTS, InTS;
+ /// The base class kind. Most intrinsics use ClassS, which has full type
+ /// info for integers (s32/u32). Some use ClassI, which doesn't care about
+ /// signedness (i32), while some (ClassB) have no type at all, only a width
+ /// (32).
+ ClassKind CK;
+ /// The list of DAGs for the body. May be empty, in which case we should
+ /// emit a builtin call.
+ ListInit *Body;
+ /// The architectural #ifdef guard.
+ std::string Guard;
+ /// Set if the Unvailable bit is 1. This means we don't generate a body,
+ /// just an "unavailable" attribute on a declaration.
+ bool IsUnavailable;
+
+ /// The types of return value [0] and parameters [1..].
+ std::vector<Type> Types;
+ /// The local variables defined.
+ std::map<std::string, Variable> Variables;
+ /// NeededEarly - set if any other intrinsic depends on this intrinsic.
+ bool NeededEarly;
+ /// UseMacro - set if we should implement using a macro or unset for a
+ /// function.
+ bool UseMacro;
+ /// The set of intrinsics that this intrinsic uses/requires.
+ std::set<Intrinsic *> Dependencies;
+ /// The "base type", which is Type('d', OutTS). InBaseType is only
+ /// different if CartesianProductOfTypes = 1 (for vreinterpret).
+ Type BaseType, InBaseType;
+ /// The return variable.
+ Variable RetVar;
+ /// A postfix to apply to every variable. Defaults to "".
+ std::string VariablePostfix;
+
+ NeonEmitter &Emitter;
+ std::stringstream OS;
+
+public:
+ Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
+ TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+ StringRef Guard, bool IsUnavailable)
+ : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
+ CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
+ NeededEarly(false), UseMacro(false), BaseType(OutTS, 'd'),
+ InBaseType(InTS, 'd'), Emitter(Emitter) {
+ // If this builtin takes an immediate argument, we need to #define it rather
+ // than use a standard declaration, so that SemaChecking can range check
+ // the immediate passed by the user.
+ if (Proto.find('i') != std::string::npos)
+ UseMacro = true;
+
+ // Pointer arguments need to use macros to avoid hiding aligned attributes
+ // from the pointer type.
+ if (Proto.find('p') != std::string::npos ||
+ Proto.find('c') != std::string::npos)
+ UseMacro = true;
+
+ // It is not permitted to pass or return an __fp16 by value, so intrinsics
+ // taking a scalar float16_t must be implemented as macros.
+ if (OutTS.find('h') != std::string::npos &&
+ Proto.find('s') != std::string::npos)
+ UseMacro = true;
+
+ // Modify the TypeSpec per-argument to get a concrete Type, and create
+ // known variables for each.
+ // Types[0] is the return value.
+ Types.push_back(Type(OutTS, Proto[0]));
+ for (unsigned I = 1; I < Proto.size(); ++I)
+ Types.push_back(Type(InTS, Proto[I]));
+ }
+
+ /// Get the Record that this intrinsic is based off.
+ Record *getRecord() const { return R; }
+ /// Get the set of Intrinsics that this intrinsic calls.
+ /// this is the set of immediate dependencies, NOT the
+ /// transitive closure.
+ const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
+ /// Get the architectural guard string (#ifdef).
+ std::string getGuard() const { return Guard; }
+ /// Get the non-mangled name.
+ std::string getName() const { return Name; }
+
+ /// Return true if the intrinsic takes an immediate operand.
+ bool hasImmediate() const {
+ return Proto.find('i') != std::string::npos;
+ }
+ /// Return the parameter index of the immediate operand.
+ unsigned getImmediateIdx() const {
+ assert(hasImmediate());
+ unsigned Idx = Proto.find('i');
+ assert(Idx > 0 && "Can't return an immediate!");
+ return Idx - 1;
+ }
+
+ /// Return true if the intrinsic takes an splat operand.
+ bool hasSplat() const { return Proto.find('a') != std::string::npos; }
+ /// Return the parameter index of the splat operand.
+ unsigned getSplatIdx() const {
+ assert(hasSplat());
+ unsigned Idx = Proto.find('a');
+ assert(Idx > 0 && "Can't return a splat!");
+ return Idx - 1;
+ }
+
+ unsigned getNumParams() const { return Proto.size() - 1; }
+ Type getReturnType() const { return Types[0]; }
+ Type getParamType(unsigned I) const { return Types[I + 1]; }
+ Type getBaseType() const { return BaseType; }
+ /// Return the raw prototype string.
+ std::string getProto() const { return Proto; }
+
+ /// Return true if the prototype has a scalar argument.
+ /// This does not return true for the "splat" code ('a').
+ bool protoHasScalar();
+
+ /// Return the index that parameter PIndex will sit at
+ /// in a generated function call. This is often just PIndex,
+ /// but may not be as things such as multiple-vector operands
+ /// and sret parameters need to be taken into accont.
+ unsigned getGeneratedParamIdx(unsigned PIndex) {
+ unsigned Idx = 0;
+ if (getReturnType().getNumVectors() > 1)
+ // Multiple vectors are passed as sret.
+ ++Idx;
+
+ for (unsigned I = 0; I < PIndex; ++I)
+ Idx += std::max(1U, getParamType(I).getNumVectors());
+
+ return Idx;
+ }
+
+ bool hasBody() const { return Body && Body->getValues().size() > 0; }
+
+ void setNeededEarly() { NeededEarly = true; }
+
+ bool operator<(const Intrinsic &Other) const {
+ // Sort lexicographically on a two-tuple (Guard, Name)
+ if (Guard != Other.Guard)
+ return Guard < Other.Guard;
+ return Name < Other.Name;
+ }
+
+ ClassKind getClassKind(bool UseClassBIfScalar = false) {
+ if (UseClassBIfScalar && !protoHasScalar())
+ return ClassB;
+ return CK;
+ }
+
+ /// Return the name, mangled with type information.
+ /// If ForceClassS is true, use ClassS (u32/s32) instead
+ /// of the intrinsic's own type class.
+ std::string getMangledName(bool ForceClassS = false);
+ /// Return the type code for a builtin function call.
+ std::string getInstTypeCode(Type T, ClassKind CK);
+ /// Return the type string for a BUILTIN() macro in Builtins.def.
+ std::string getBuiltinTypeStr();
+
+ /// Generate the intrinsic, returning code.
+ std::string generate();
+ /// Perform type checking and populate the dependency graph, but
+ /// don't generate code yet.
+ void indexBody();
+
+private:
+ std::string mangleName(std::string Name, ClassKind CK);
+
+ void initVariables();
+ std::string replaceParamsIn(std::string S);
+
+ void emitBodyAsBuiltinCall();
+ std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
+ std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
+ std::pair<Type, std::string> emitDagSplat(DagInit *DI);
+ std::pair<Type, std::string> emitDagDup(DagInit *DI);
+ std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
+ std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
+ std::pair<Type, std::string> emitDagCall(DagInit *DI);
+ std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
+ std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
+ std::pair<Type, std::string> emitDagOp(DagInit *DI);
+ std::pair<Type, std::string> emitDag(DagInit *DI);
+
+ void emitReturn();
+ void emitBody();
+ void emitShadowedArgs();
+ void emitNewLine();
+ void emitClosingBrace();
+ void emitOpeningBrace();
+ void emitPrototype();
+};
+
+//===----------------------------------------------------------------------===//
+// NeonEmitter
+//===----------------------------------------------------------------------===//
+
class NeonEmitter {
RecordKeeper &Records;
- StringMap<OpKind> OpMap;
- DenseMap<Record*, ClassKind> ClassMap;
+ DenseMap<Record *, ClassKind> ClassMap;
+ std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
+ unsigned UniqueNumber;
+
+ void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
+ void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
+ void genOverloadTypeCheckCode(raw_ostream &OS,
+ SmallVectorImpl<Intrinsic *> &Defs);
+ void genIntrinsicRangeCheckCode(raw_ostream &OS,
+ SmallVectorImpl<Intrinsic *> &Defs);
public:
- NeonEmitter(RecordKeeper &R) : Records(R) {
- OpMap["OP_NONE"] = OpNone;
- OpMap["OP_UNAVAILABLE"] = OpUnavailable;
- OpMap["OP_ADD"] = OpAdd;
- OpMap["OP_ADDL"] = OpAddl;
- OpMap["OP_ADDLHi"] = OpAddlHi;
- OpMap["OP_ADDW"] = OpAddw;
- OpMap["OP_ADDWHi"] = OpAddwHi;
- OpMap["OP_SUB"] = OpSub;
- OpMap["OP_SUBL"] = OpSubl;
- OpMap["OP_SUBLHi"] = OpSublHi;
- OpMap["OP_SUBW"] = OpSubw;
- OpMap["OP_SUBWHi"] = OpSubwHi;
- OpMap["OP_MUL"] = OpMul;
- OpMap["OP_MLA"] = OpMla;
- OpMap["OP_MLAL"] = OpMlal;
- OpMap["OP_MULLHi"] = OpMullHi;
- OpMap["OP_MULLHi_P64"] = OpMullHiP64;
- OpMap["OP_MULLHi_N"] = OpMullHiN;
- OpMap["OP_MLALHi"] = OpMlalHi;
- OpMap["OP_MLALHi_N"] = OpMlalHiN;
- OpMap["OP_MLS"] = OpMls;
- OpMap["OP_MLSL"] = OpMlsl;
- OpMap["OP_MLSLHi"] = OpMlslHi;
- OpMap["OP_MLSLHi_N"] = OpMlslHiN;
- OpMap["OP_MUL_N"] = OpMulN;
- OpMap["OP_MLA_N"] = OpMlaN;
- OpMap["OP_MLS_N"] = OpMlsN;
- OpMap["OP_FMLA_N"] = OpFMlaN;
- OpMap["OP_FMLS_N"] = OpFMlsN;
- OpMap["OP_MLAL_N"] = OpMlalN;
- OpMap["OP_MLSL_N"] = OpMlslN;
- OpMap["OP_MUL_LN"]= OpMulLane;
- OpMap["OP_MULX_LN"]= OpMulXLane;
- OpMap["OP_MULL_LN"] = OpMullLane;
- OpMap["OP_MULLHi_LN"] = OpMullHiLane;
- OpMap["OP_MLA_LN"]= OpMlaLane;
- OpMap["OP_MLS_LN"]= OpMlsLane;
- OpMap["OP_MLAL_LN"] = OpMlalLane;
- OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
- OpMap["OP_MLSL_LN"] = OpMlslLane;
- OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
- OpMap["OP_QDMULL_LN"] = OpQDMullLane;
- OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
- OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
- OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
- OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
- OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
- OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
- OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
- OpMap["OP_FMS_LN"] = OpFMSLane;
- OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
- OpMap["OP_TRN1"] = OpTrn1;
- OpMap["OP_ZIP1"] = OpZip1;
- OpMap["OP_UZP1"] = OpUzp1;
- OpMap["OP_TRN2"] = OpTrn2;
- OpMap["OP_ZIP2"] = OpZip2;
- OpMap["OP_UZP2"] = OpUzp2;
- OpMap["OP_EQ"] = OpEq;
- OpMap["OP_GE"] = OpGe;
- OpMap["OP_LE"] = OpLe;
- OpMap["OP_GT"] = OpGt;
- OpMap["OP_LT"] = OpLt;
- OpMap["OP_NEG"] = OpNeg;
- OpMap["OP_NOT"] = OpNot;
- OpMap["OP_AND"] = OpAnd;
- OpMap["OP_OR"] = OpOr;
- OpMap["OP_XOR"] = OpXor;
- OpMap["OP_ANDN"] = OpAndNot;
- OpMap["OP_ORN"] = OpOrNot;
- OpMap["OP_CAST"] = OpCast;
- OpMap["OP_CONC"] = OpConcat;
- OpMap["OP_HI"] = OpHi;
- OpMap["OP_LO"] = OpLo;
- OpMap["OP_DUP"] = OpDup;
- OpMap["OP_DUP_LN"] = OpDupLane;
- OpMap["OP_SEL"] = OpSelect;
- OpMap["OP_REV16"] = OpRev16;
- OpMap["OP_REV32"] = OpRev32;
- OpMap["OP_REV64"] = OpRev64;
- OpMap["OP_XTN"] = OpXtnHi;
- OpMap["OP_SQXTUN"] = OpSqxtunHi;
- OpMap["OP_QXTN"] = OpQxtnHi;
- OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
- OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
- OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
- OpMap["OP_REINT"] = OpReinterpret;
- OpMap["OP_ADDHNHi"] = OpAddhnHi;
- OpMap["OP_RADDHNHi"] = OpRAddhnHi;
- OpMap["OP_SUBHNHi"] = OpSubhnHi;
- OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
- OpMap["OP_ABDL"] = OpAbdl;
- OpMap["OP_ABDLHi"] = OpAbdlHi;
- OpMap["OP_ABA"] = OpAba;
- OpMap["OP_ABAL"] = OpAbal;
- OpMap["OP_ABALHi"] = OpAbalHi;
- OpMap["OP_QDMULLHi"] = OpQDMullHi;
- OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
- OpMap["OP_QDMLALHi"] = OpQDMlalHi;
- OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
- OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
- OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
- OpMap["OP_DIV"] = OpDiv;
- OpMap["OP_LONG_HI"] = OpLongHi;
- OpMap["OP_NARROW_HI"] = OpNarrowHi;
- OpMap["OP_MOVL_HI"] = OpMovlHi;
- OpMap["OP_COPY_LN"] = OpCopyLane;
- OpMap["OP_COPYQ_LN"] = OpCopyQLane;
- OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
- OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
- OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
- OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
- OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
- OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
- OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
- OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
- OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
- OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
- OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
- OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
- OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
- OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
- OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
+ /// Called by Intrinsic - this attempts to get an intrinsic that takes
+ /// the given types as arguments.
+ Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);
+ /// Called by Intrinsic - returns a globally-unique number.
+ unsigned getUniqueNumber() { return UniqueNumber++; }
+
+ NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
Record *WI = R.getClass("WInst");
// runTests - Emit tests for all the Neon intrinsics.
void runTests(raw_ostream &o);
-
-private:
- void emitGuardedIntrinsic(raw_ostream &OS, Record *R,
- std::string &CurrentGuard, bool &InGuard,
- StringMap<ClassKind> &EmittedMap);
- void emitIntrinsic(raw_ostream &OS, Record *R,
- StringMap<ClassKind> &EmittedMap);
- void genBuiltinsDef(raw_ostream &OS);
- void genOverloadTypeCheckCode(raw_ostream &OS);
- void genIntrinsicRangeCheckCode(raw_ostream &OS);
- void genTargetTest(raw_ostream &OS);
};
+
} // end anonymous namespace
-/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
-/// which each StringRef representing a single type declared in the string.
-/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
-/// 2xfloat and 4xfloat respectively.
-static void ParseTypes(Record *r, std::string &s,
- SmallVectorImpl<StringRef> &TV) {
- const char *data = s.data();
- int len = 0;
-
- for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
- if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
- || data[len] == 'H' || data[len] == 'S')
- continue;
+//===----------------------------------------------------------------------===//
+// Type implementation
+//===----------------------------------------------------------------------===//
- switch (data[len]) {
- case 'c':
- case 's':
- case 'i':
- case 'l':
- case 'k':
- case 'h':
- case 'f':
- case 'd':
- break;
- default:
- PrintFatalError(r->getLoc(),
- "Unexpected letter: " + std::string(data + len, 1));
- }
- TV.push_back(StringRef(data, len + 1));
- data += len + 1;
- len = -1;
- }
+std::string Type::str() const {
+ if (Void)
+ return "void";
+ std::string S;
+
+ if (!Signed && isInteger())
+ S += "u";
+
+ if (Poly)
+ S += "poly";
+ else if (Float)
+ S += "float";
+ else
+ S += "int";
+
+ S += utostr(ElementBitwidth);
+ if (isVector())
+ S += "x" + utostr(getNumElements());
+ if (NumVectors > 1)
+ S += "x" + utostr(NumVectors);
+ S += "_t";
+
+ if (Constant)
+ S += " const";
+ if (Pointer)
+ S += " *";
+
+ return S;
}
-/// Widen - Convert a type code into the next wider type. char -> short,
-/// short -> int, etc.
-static char Widen(const char t) {
- switch (t) {
- case 'c':
- return 's';
- case 's':
- return 'i';
- case 'i':
- return 'l';
- case 'l':
- return 'k';
- case 'h':
- return 'f';
- case 'f':
- return 'd';
- default:
- PrintFatalError("unhandled type in widen!");
+std::string Type::builtin_str() const {
+ std::string S;
+ if (isVoid())
+ return "v";
+
+ if (Pointer)
+ // All pointers are void pointers.
+ S += "v";
+ else if (isInteger())
+ switch (ElementBitwidth) {
+ case 8: S += "c"; break;
+ case 16: S += "s"; break;
+ case 32: S += "i"; break;
+ case 64: S += "Wi"; break;
+ case 128: S += "LLLi"; break;
+ default: assert(0 && "Unhandled case!");
+ }
+ else
+ switch (ElementBitwidth) {
+ case 16: S += "h"; break;
+ case 32: S += "f"; break;
+ case 64: S += "d"; break;
+ default: assert(0 && "Unhandled case!");
+ }
+
+ if (isChar() && !Pointer)
+ // Make chars explicitly signed.
+ S = "S" + S;
+ else if (isInteger() && !Pointer && !Signed)
+ S = "U" + S;
+
+ if (isScalar()) {
+ if (Constant) S += "C";
+ if (Pointer) S += "*";
+ return S;
}
+
+ std::string Ret;
+ for (unsigned I = 0; I < NumVectors; ++I)
+ Ret += "V" + utostr(getNumElements()) + S;
+
+ return Ret;
}
-/// Narrow - Convert a type code into the next smaller type. short -> char,
-/// float -> half float, etc.
-static char Narrow(const char t) {
- switch (t) {
- case 's':
- return 'c';
- case 'i':
- return 's';
- case 'l':
- return 'i';
- case 'k':
- return 'l';
- case 'f':
- return 'h';
- case 'd':
- return 'f';
- default:
- PrintFatalError("unhandled type in narrow!");
+unsigned Type::getNeonEnum() const {
+ unsigned Addend;
+ switch (ElementBitwidth) {
+ case 8: Addend = 0; break;
+ case 16: Addend = 1; break;
+ case 32: Addend = 2; break;
+ case 64: Addend = 3; break;
+ case 128: Addend = 4; break;
+ default: assert(0 && "Unhandled element bitwidth!");
}
-}
-static std::string GetNarrowTypestr(StringRef ty)
-{
- std::string s;
- for (size_t i = 0, end = ty.size(); i < end; i++) {
- switch (ty[i]) {
- case 's':
- s += 'c';
- break;
- case 'i':
- s += 's';
- break;
- case 'l':
- s += 'i';
- break;
- case 'k':
- s += 'l';
- break;
- default:
- s += ty[i];
- break;
- }
+ unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
+ if (Poly) {
+ // Adjustment needed because Poly32 doesn't exist.
+ if (Addend >= 2)
+ --Addend;
+ Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
+ }
+ if (Float) {
+ assert(Addend != 0 && "Float8 doesn't exist!");
+ Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
}
- return s;
+ if (Bitwidth == 128)
+ Base |= (unsigned)NeonTypeFlags::QuadFlag;
+ if (isInteger() && !Signed)
+ Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
+
+ return Base;
}
-/// For a particular StringRef, return the base type code, and whether it has
-/// the quad-vector, polynomial, or unsigned modifiers set.
-static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
- unsigned off = 0;
- // ignore scalar.
- if (ty[off] == 'S') {
- ++off;
+Type Type::fromTypedefName(StringRef Name) {
+ Type T;
+ T.Void = false;
+ T.Float = false;
+ T.Poly = false;
+
+ if (Name.front() == 'u') {
+ T.Signed = false;
+ Name = Name.drop_front();
+ } else {
+ T.Signed = true;
}
- // remember quad.
- if (ty[off] == 'Q' || ty[off] == 'H') {
- quad = true;
- ++off;
+
+ if (Name.startswith("float")) {
+ T.Float = true;
+ Name = Name.drop_front(5);
+ } else if (Name.startswith("poly")) {
+ T.Poly = true;
+ Name = Name.drop_front(4);
+ } else {
+ assert(Name.startswith("int"));
+ Name = Name.drop_front(3);
}
- // remember poly.
- if (ty[off] == 'P') {
- poly = true;
- ++off;
+ unsigned I = 0;
+ for (I = 0; I < Name.size(); ++I) {
+ if (!isdigit(Name[I]))
+ break;
}
+ Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
+ Name = Name.drop_front(I);
- // remember unsigned.
- if (ty[off] == 'U') {
- usgn = true;
- ++off;
+ T.Bitwidth = T.ElementBitwidth;
+ T.NumVectors = 1;
+
+ if (Name.front() == 'x') {
+ Name = Name.drop_front();
+ unsigned I = 0;
+ for (I = 0; I < Name.size(); ++I) {
+ if (!isdigit(Name[I]))
+ break;
+ }
+ unsigned NumLanes;
+ Name.substr(0, I).getAsInteger(10, NumLanes);
+ Name = Name.drop_front(I);
+ T.Bitwidth = T.ElementBitwidth * NumLanes;
+ } else {
+ // Was scalar.
+ T.NumVectors = 0;
+ }
+ if (Name.front() == 'x') {
+ Name = Name.drop_front();
+ unsigned I = 0;
+ for (I = 0; I < Name.size(); ++I) {
+ if (!isdigit(Name[I]))
+ break;
+ }
+ Name.substr(0, I).getAsInteger(10, T.NumVectors);
+ Name = Name.drop_front(I);
}
- // base type to get the type string for.
- return ty[off];
+ assert(Name.startswith("_t") && "Malformed typedef!");
+ return T;
}
-/// ModType - Transform a type code and its modifiers based on a mod code. The
-/// mod code definitions may be found at the top of arm_neon.td.
-static char ModType(const char mod, char type, bool &quad, bool &poly,
- bool &usgn, bool &scal, bool &cnst, bool &pntr) {
- switch (mod) {
- case 't':
- if (poly) {
- poly = false;
- usgn = true;
- }
- break;
- case 'b':
- scal = true;
- case 'u':
- usgn = true;
- poly = false;
- if (type == 'f')
- type = 'i';
- if (type == 'd')
- type = 'l';
- break;
- case '$':
- scal = true;
- case 'x':
- usgn = false;
- poly = false;
- if (type == 'f')
- type = 'i';
- if (type == 'd')
- type = 'l';
- break;
- case 'o':
- scal = true;
- type = 'd';
- usgn = false;
- break;
- case 'y':
- scal = true;
- case 'f':
- if (type == 'h')
- quad = true;
- type = 'f';
- usgn = false;
- break;
- case 'F':
- type = 'd';
- usgn = false;
- break;
- case 'g':
- quad = false;
- break;
- case 'B':
- case 'C':
- case 'D':
- case 'j':
- quad = true;
- break;
- case 'w':
- type = Widen(type);
- quad = true;
- break;
- case 'n':
- type = Widen(type);
- break;
- case 'i':
- type = 'i';
- scal = true;
- break;
- case 'l':
- type = 'l';
- scal = true;
- usgn = true;
+void Type::applyTypespec(bool &Quad) {
+ std::string S = TS;
+ ScalarForMangling = false;
+ Void = false;
+ Poly = Float = false;
+ ElementBitwidth = ~0U;
+ Signed = true;
+ NumVectors = 1;
+
+ for (char I : S) {
+ switch (I) {
+ case 'S':
+ ScalarForMangling = true;
break;
- case 'z':
- type = Narrow(type);
- scal = true;
+ case 'H':
+ NoManglingQ = true;
+ Quad = true;
break;
- case 'r':
- type = Widen(type);
- scal = true;
+ case 'Q':
+ Quad = true;
break;
- case 's':
- case 'a':
- scal = true;
+ case 'P':
+ Poly = true;
break;
- case 'k':
- quad = true;
+ case 'U':
+ Signed = false;
break;
case 'c':
- cnst = true;
- case 'p':
- pntr = true;
- scal = true;
+ ElementBitwidth = 8;
break;
case 'h':
- type = Narrow(type);
- if (type == 'h')
- quad = false;
- break;
- case 'q':
- type = Narrow(type);
- quad = true;
- break;
- case 'e':
- type = Narrow(type);
- usgn = true;
- break;
- case 'm':
- type = Narrow(type);
- quad = false;
- break;
- default:
- break;
- }
- return type;
-}
-
-static bool IsMultiVecProto(const char p) {
- return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
-}
-
-/// TypeString - for a modifier and type, generate the name of the typedef for
-/// that type. QUc -> uint8x8_t.
-static std::string TypeString(const char mod, StringRef typestr) {
- bool quad = false;
- bool poly = false;
- bool usgn = false;
- bool scal = false;
- bool cnst = false;
- bool pntr = false;
-
- if (mod == 'v')
- return "void";
- if (mod == 'i')
- return "int";
-
- // base type to get the type string for.
- char type = ClassifyType(typestr, quad, poly, usgn);
-
- // Based on the modifying character, change the type and width if necessary.
- type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
-
- SmallString<128> s;
-
- if (usgn)
- s.push_back('u');
-
- switch (type) {
- case 'c':
- s += poly ? "poly8" : "int8";
- if (scal)
- break;
- s += quad ? "x16" : "x8";
- break;
+ Float = true;
+ // Fall through
case 's':
- s += poly ? "poly16" : "int16";
- if (scal)
- break;
- s += quad ? "x8" : "x4";
+ ElementBitwidth = 16;
break;
+ case 'f':
+ Float = true;
+ // Fall through
case 'i':
- s += "int32";
- if (scal)
- break;
- s += quad ? "x4" : "x2";
+ ElementBitwidth = 32;
break;
+ case 'd':
+ Float = true;
+ // Fall through
case 'l':
- s += (poly && !usgn)? "poly64" : "int64";
- if (scal)
- break;
- s += quad ? "x2" : "x1";
+ ElementBitwidth = 64;
break;
case 'k':
- s += "poly128";
- break;
- case 'h':
- s += "float16";
- if (scal)
- break;
- s += quad ? "x8" : "x4";
- break;
- case 'f':
- s += "float32";
- if (scal)
- break;
- s += quad ? "x4" : "x2";
- break;
- case 'd':
- s += "float64";
- if (scal)
- break;
- s += quad ? "x2" : "x1";
+ ElementBitwidth = 128;
+ // Poly doesn't have a 128x1 type.
+ if (Poly)
+ NumVectors = 0;
break;
-
default:
- PrintFatalError("unhandled type!");
+ assert(0 && "Unhandled type code!");
+ }
}
+ assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
- if (mod == '2' || mod == 'B')
- s += "x2";
- if (mod == '3' || mod == 'C')
- s += "x3";
- if (mod == '4' || mod == 'D')
- s += "x4";
-
- // Append _t, finishing the type string typedef type.
- s += "_t";
-
- if (cnst)
- s += " const";
-
- if (pntr)
- s += " *";
-
- return s.str();
+ Bitwidth = Quad ? 128 : 64;
}
-/// BuiltinTypeString - for a modifier and type, generate the clang
-/// BuiltinsARM.def prototype code for the function. See the top of clang's
-/// Builtins.def for a description of the type strings.
-static std::string BuiltinTypeString(const char mod, StringRef typestr,
- ClassKind ck, bool ret) {
- bool quad = false;
- bool poly = false;
- bool usgn = false;
- bool scal = false;
- bool cnst = false;
- bool pntr = false;
-
- if (mod == 'v')
- return "v"; // void
- if (mod == 'i')
- return "i"; // int
-
- // base type to get the type string for.
- char type = ClassifyType(typestr, quad, poly, usgn);
-
- // Based on the modifying character, change the type and width if necessary.
- type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
-
- usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
- scal && type != 'f' && type != 'd');
-
- // All pointers are void* pointers. Change type to 'v' now.
- if (pntr) {
- usgn = false;
- poly = false;
- type = 'v';
- }
- // Treat half-float ('h') types as unsigned short ('s') types.
- if (type == 'h') {
- type = 's';
- usgn = true;
- }
-
- if (scal) {
- SmallString<128> s;
-
- if (usgn)
- s.push_back('U');
- else if (type == 'c')
- s.push_back('S'); // make chars explicitly signed
-
- if (type == 'l') // 64-bit long
- s += "Wi";
- else if (type == 'k') // 128-bit long
- s = "LLLi";
- else
- s.push_back(type);
-
- if (cnst)
- s.push_back('C');
- if (pntr)
- s.push_back('*');
- return s.str();
- }
-
- // Since the return value must be one type, return a vector type of the
- // appropriate width which we will bitcast. An exception is made for
- // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
- // fashion, storing them to a pointer arg.
- if (ret) {
- if (IsMultiVecProto(mod))
- return "vv*"; // void result with void* first argument
- if (mod == 'f' || (ck != ClassB && type == 'f'))
- return quad ? "V4f" : "V2f";
- if (mod == 'F' || (ck != ClassB && type == 'd'))
- return quad ? "V2d" : "V1d";
- if (ck != ClassB && type == 's')
- return quad ? "V8s" : "V4s";
- if (ck != ClassB && type == 'i')
- return quad ? "V4i" : "V2i";
- if (ck != ClassB && type == 'l')
- return quad ? "V2Wi" : "V1Wi";
-
- return quad ? "V16Sc" : "V8Sc";
- }
-
- // Non-return array types are passed as individual vectors.
- if (mod == '2' || mod == 'B')
- return quad ? "V16ScV16Sc" : "V8ScV8Sc";
- if (mod == '3' || mod == 'C')
- return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
- if (mod == '4' || mod == 'D')
- return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
-
- if (mod == 'f' || (ck != ClassB && type == 'f'))
- return quad ? "V4f" : "V2f";
- if (mod == 'F' || (ck != ClassB && type == 'd'))
- return quad ? "V2d" : "V1d";
- if (ck != ClassB && type == 's')
- return quad ? "V8s" : "V4s";
- if (ck != ClassB && type == 'i')
- return quad ? "V4i" : "V2i";
- if (ck != ClassB && type == 'l')
- return quad ? "V2Wi" : "V1Wi";
-
- return quad ? "V16Sc" : "V8Sc";
-}
+void Type::applyModifier(char Mod) {
+ bool AppliedQuad = false;
+ applyTypespec(AppliedQuad);
-/// InstructionTypeCode - Computes the ARM argument character code and
-/// quad status for a specific type string and ClassKind.
-static void InstructionTypeCode(const StringRef &typeStr,
- const ClassKind ck,
- bool &quad,
- std::string &typeCode) {
- bool poly = false;
- bool usgn = false;
- char type = ClassifyType(typeStr, quad, poly, usgn);
-
- switch (type) {
- case 'c':
- switch (ck) {
- case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
- case ClassI: typeCode = "i8"; break;
- case ClassW: typeCode = "8"; break;
- default: break;
- }
+ switch (Mod) {
+ case 'v':
+ Void = true;
break;
- case 's':
- switch (ck) {
- case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
- case ClassI: typeCode = "i16"; break;
- case ClassW: typeCode = "16"; break;
- default: break;
+ case 't':
+ if (Poly) {
+ Poly = false;
+ Signed = false;
}
break;
+ case 'b':
+ Signed = false;
+ Float = false;
+ Poly = false;
+ NumVectors = 0;
+ Bitwidth = ElementBitwidth;
+ break;
+ case '$':
+ Signed = true;
+ Float = false;
+ Poly = false;
+ NumVectors = 0;
+ Bitwidth = ElementBitwidth;
+ break;
+ case 'u':
+ Signed = false;
+ Poly = false;
+ Float = false;
+ break;
+ case 'x':
+ Signed = true;
+ assert(!Poly && "'u' can't be used with poly types!");
+ Float = false;
+ break;
+ case 'o':
+ Bitwidth = ElementBitwidth = 64;
+ NumVectors = 0;
+ Float = true;
+ break;
+ case 'y':
+ Bitwidth = ElementBitwidth = 32;
+ NumVectors = 0;
+ Float = true;
+ break;
+ case 'f':
+ // Special case - if we're half-precision, a floating
+ // point argument needs to be 128-bits (double size).
+ if (isHalf())
+ Bitwidth = 128;
+ Float = true;
+ ElementBitwidth = 32;
+ break;
+ case 'F':
+ Float = true;
+ ElementBitwidth = 64;
+ break;
+ case 'g':
+ if (AppliedQuad)
+ Bitwidth /= 2;
+ break;
+ case 'j':
+ if (!AppliedQuad)
+ Bitwidth *= 2;
+ break;
+ case 'w':
+ ElementBitwidth *= 2;
+ Bitwidth *= 2;
+ break;
+ case 'n':
+ ElementBitwidth *= 2;
+ break;
case 'i':
- switch (ck) {
- case ClassS: typeCode = usgn ? "u32" : "s32"; break;
- case ClassI: typeCode = "i32"; break;
- case ClassW: typeCode = "32"; break;
- default: break;
- }
+ Float = false;
+ Poly = false;
+ ElementBitwidth = Bitwidth = 32;
+ NumVectors = 0;
+ Signed = true;
break;
case 'l':
- switch (ck) {
- case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
- case ClassI: typeCode = "i64"; break;
- case ClassW: typeCode = "64"; break;
- default: break;
- }
+ Float = false;
+ Poly = false;
+ ElementBitwidth = Bitwidth = 64;
+ NumVectors = 0;
+ Signed = false;
+ break;
+ case 'z':
+ ElementBitwidth /= 2;
+ Bitwidth = ElementBitwidth;
+ NumVectors = 0;
+ break;
+ case 'r':
+ ElementBitwidth *= 2;
+ Bitwidth = ElementBitwidth;
+ NumVectors = 0;
+ break;
+ case 's':
+ case 'a':
+ Bitwidth = ElementBitwidth;
+ NumVectors = 0;
break;
case 'k':
- assert(poly && "Unrecognized 128 bit integer.");
- typeCode = "p128";
+ Bitwidth *= 2;
+ break;
+ case 'c':
+ Constant = true;
+ // Fall through
+ case 'p':
+ Pointer = true;
+ Bitwidth = ElementBitwidth;
+ NumVectors = 0;
break;
case 'h':
- switch (ck) {
- case ClassS:
- case ClassI: typeCode = "f16"; break;
- case ClassW: typeCode = "16"; break;
- default: break;
- }
+ ElementBitwidth /= 2;
break;
- case 'f':
- switch (ck) {
- case ClassS:
- case ClassI: typeCode = "f32"; break;
- case ClassW: typeCode = "32"; break;
- default: break;
- }
+ case 'q':
+ ElementBitwidth /= 2;
+ Bitwidth *= 2;
+ break;
+ case 'e':
+ ElementBitwidth /= 2;
+ Signed = false;
+ break;
+ case 'm':
+ ElementBitwidth /= 2;
+ Bitwidth /= 2;
break;
case 'd':
- switch (ck) {
- case ClassS:
- case ClassI:
- typeCode += "f64";
- break;
- case ClassW:
- PrintFatalError("unhandled type!");
- default:
- break;
- }
+ break;
+ case '2':
+ NumVectors = 2;
+ break;
+ case '3':
+ NumVectors = 3;
+ break;
+ case '4':
+ NumVectors = 4;
+ break;
+ case 'B':
+ NumVectors = 2;
+ if (!AppliedQuad)
+ Bitwidth *= 2;
+ break;
+ case 'C':
+ NumVectors = 3;
+ if (!AppliedQuad)
+ Bitwidth *= 2;
+ break;
+ case 'D':
+ NumVectors = 4;
+ if (!AppliedQuad)
+ Bitwidth *= 2;
break;
default:
- PrintFatalError("unhandled type!");
+ assert(0 && "Unhandled character!");
}
}
-static char Insert_BHSD_Suffix(StringRef typestr){
- unsigned off = 0;
- if(typestr[off++] == 'S'){
- while(typestr[off] == 'Q' || typestr[off] == 'H'||
- typestr[off] == 'P' || typestr[off] == 'U')
- ++off;
- switch (typestr[off]){
- default : break;
- case 'c' : return 'b';
- case 's' : return 'h';
- case 'i' :
- case 'f' : return 's';
- case 'l' :
- case 'd' : return 'd';
+//===----------------------------------------------------------------------===//
+// Intrinsic implementation
+//===----------------------------------------------------------------------===//
+
+std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
+ char typeCode = '\0';
+ bool printNumber = true;
+
+ if (CK == ClassB)
+ return "";
+
+ if (T.isPoly())
+ typeCode = 'p';
+ else if (T.isInteger())
+ typeCode = T.isSigned() ? 's' : 'u';
+ else
+ typeCode = 'f';
+
+ if (CK == ClassI) {
+ switch (typeCode) {
+ default:
+ break;
+ case 's':
+ case 'u':
+ case 'p':
+ typeCode = 'i';
+ break;
}
}
- return 0;
-}
-
-static bool endsWith_xN(std::string const &name) {
- if (name.length() > 3) {
- if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
- name.compare(name.length() - 3, 3, "_x3") == 0 ||
- name.compare(name.length() - 3, 3, "_x4") == 0)
- return true;
+ if (CK == ClassB) {
+ typeCode = '\0';
}
- return false;
+
+ std::string S;
+ if (typeCode != '\0')
+ S.push_back(typeCode);
+ if (printNumber)
+ S += utostr(T.getElementSizeInBits());
+
+ return S;
}
-/// MangleName - Append a type or width suffix to a base neon function name,
-/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
-/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
-/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
-static std::string MangleName(const std::string &name, StringRef typestr,
- ClassKind ck) {
- if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
- name == "vcvt_f64_f32")
- return name;
+std::string Intrinsic::getBuiltinTypeStr() {
+ ClassKind LocalCK = getClassKind(true);
+ std::string S;
- bool quad = false;
- std::string typeCode = "";
+ Type RetT = getReturnType();
+ if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
+ !RetT.isFloating())
+ RetT.makeInteger(RetT.getElementSizeInBits(), false);
- InstructionTypeCode(typestr, ck, quad, typeCode);
+ // Since the return value must be one type, return a vector type of the
+ // appropriate width which we will bitcast. An exception is made for
+ // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
+ // fashion, storing them to a pointer arg.
+ if (RetT.getNumVectors() > 1) {
+ S += "vv*"; // void result with void* first argument
+ } else {
+ if (RetT.isPoly())
+ RetT.makeInteger(RetT.getElementSizeInBits(), false);
+ if (!RetT.isScalar() && !RetT.isSigned())
+ RetT.makeSigned();
- std::string s = name;
+ bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
+ if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
+ // Cast to vector of 8-bit elements.
+ RetT.makeInteger(8, true);
- if (typeCode.size() > 0) {
- // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
- if (endsWith_xN(s))
- s.insert(s.length() - 3, "_" + typeCode);
- else
- s += "_" + typeCode;
+ S += RetT.builtin_str();
}
- if (ck == ClassB)
- s += "_v";
+ for (unsigned I = 0; I < getNumParams(); ++I) {
+ Type T = getParamType(I);
+ if (T.isPoly())
+ T.makeInteger(T.getElementSizeInBits(), false);
- // Insert a 'q' before the first '_' character so that it ends up before
- // _lane or _n on vector-scalar operations.
- if (typestr.find("Q") != StringRef::npos) {
- size_t pos = s.find('_');
- s = s.insert(pos, "q");
- }
- char ins = Insert_BHSD_Suffix(typestr);
- if(ins){
- size_t pos = s.find('_');
- s = s.insert(pos, &ins, 1);
- }
+ bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
+ if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
+ T.makeInteger(8, true);
+ // Halves always get converted to 8-bit elements.
+ if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
+ T.makeInteger(8, true);
- return s;
-}
-
-static void PreprocessInstruction(const StringRef &Name,
- const std::string &InstName,
- std::string &Prefix,
- bool &HasNPostfix,
- bool &HasLanePostfix,
- bool &HasDupPostfix,
- bool &IsSpecialVCvt,
- size_t &TBNumber) {
- // All of our instruction name fields from arm_neon.td are of the form
- // <instructionname>_...
- // Thus we grab our instruction name via computation of said Prefix.
- const size_t PrefixEnd = Name.find_first_of('_');
- // If InstName is passed in, we use that instead of our name Prefix.
- Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
-
- const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
-
- HasNPostfix = Postfix.count("_n");
- HasLanePostfix = Postfix.count("_lane");
- HasDupPostfix = Postfix.count("_dup");
- IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
-
- if (InstName.compare("vtbl") == 0 ||
- InstName.compare("vtbx") == 0) {
- // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
- // encoding to get its true value.
- TBNumber = Name[Name.size()-1] - 48;
- }
-}
-
-/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
-/// extracted, generate a FileCheck pattern for a Load Or Store
-static void
-GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
- const std::string& OutTypeCode,
- const bool &IsQuad,
- const bool &HasDupPostfix,
- const bool &HasLanePostfix,
- const size_t Count,
- std::string &RegisterSuffix) {
- const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
- // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
- // will output a series of v{ld,st}1s, so we have to handle it specially.
- if ((Count == 3 || Count == 4) && IsQuad) {
- RegisterSuffix += "{";
- for (size_t i = 0; i < Count; i++) {
- RegisterSuffix += "d{{[0-9]+}}";
- if (HasDupPostfix) {
- RegisterSuffix += "[]";
- }
- if (HasLanePostfix) {
- RegisterSuffix += "[{{[0-9]+}}]";
- }
- if (i < Count-1) {
- RegisterSuffix += ", ";
- }
- }
- RegisterSuffix += "}";
- } else {
-
- // Handle normal loads and stores.
- RegisterSuffix += "{";
- for (size_t i = 0; i < Count; i++) {
- RegisterSuffix += "d{{[0-9]+}}";
- if (HasDupPostfix) {
- RegisterSuffix += "[]";
- }
- if (HasLanePostfix) {
- RegisterSuffix += "[{{[0-9]+}}]";
- }
- if (IsQuad && !HasLanePostfix) {
- RegisterSuffix += ", d{{[0-9]+}}";
- if (HasDupPostfix) {
- RegisterSuffix += "[]";
- }
- }
- if (i < Count-1) {
- RegisterSuffix += ", ";
- }
- }
- RegisterSuffix += "}, [r{{[0-9]+}}";
+ if (LocalCK == ClassI)
+ T.makeSigned();
- // We only include the alignment hint if we have a vld1.*64 or
- // a dup/lane instruction.
- if (IsLDSTOne) {
- if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
- RegisterSuffix += ":" + OutTypeCode;
- }
- }
+ // Constant indices are always just "int".
+ if (hasImmediate() && getImmediateIdx() == I)
+ T.makeInteger(32, true);
- RegisterSuffix += "]";
+ S += T.builtin_str();
}
-}
-
-static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
- const bool &HasNPostfix) {
- return (NameRef.count("vmla") ||
- NameRef.count("vmlal") ||
- NameRef.count("vmlsl") ||
- NameRef.count("vmull") ||
- NameRef.count("vqdmlal") ||
- NameRef.count("vqdmlsl") ||
- NameRef.count("vqdmulh") ||
- NameRef.count("vqdmull") ||
- NameRef.count("vqrdmulh")) && HasNPostfix;
-}
-static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
- const bool &HasLanePostfix) {
- return (NameRef.count("vmla") ||
- NameRef.count("vmls") ||
- NameRef.count("vmlal") ||
- NameRef.count("vmlsl") ||
- (NameRef.count("vmul") && NameRef.size() == 3)||
- NameRef.count("vqdmlal") ||
- NameRef.count("vqdmlsl") ||
- NameRef.count("vqdmulh") ||
- NameRef.count("vqrdmulh")) && HasLanePostfix;
-}
+ // Extra constant integer to hold type class enum for this function, e.g. s8
+ if (LocalCK == ClassB)
+ S += "i";
-static bool IsSpecialLaneMultiply(const StringRef &NameRef,
- const bool &HasLanePostfix,
- const bool &IsQuad) {
- const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
- && IsQuad;
- const bool IsVMull = NameRef.count("mull") && !IsQuad;
- return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
+ return S;
}
-static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
- const std::string &Proto,
- const bool &HasNPostfix,
- const bool &IsQuad,
- const bool &HasLanePostfix,
- const bool &HasDupPostfix,
- std::string &NormedProto) {
- // Handle generic case.
- const StringRef NameRef(Name);
- for (size_t i = 0, end = Proto.size(); i < end; i++) {
- switch (Proto[i]) {
- case 'u':
- case 'f':
- case 'F':
- case 'd':
- case 's':
- case 'x':
- case 't':
- case 'n':
- NormedProto += IsQuad? 'q' : 'd';
- break;
- case 'w':
- case 'k':
- NormedProto += 'q';
- break;
- case 'g':
- case 'j':
- case 'h':
- case 'e':
- NormedProto += 'd';
- break;
- case 'i':
- NormedProto += HasLanePostfix? 'a' : 'i';
- break;
- case 'a':
- if (HasLanePostfix) {
- NormedProto += 'a';
- } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
- NormedProto += IsQuad? 'q' : 'd';
- } else {
- NormedProto += 'i';
- }
- break;
- }
- }
+std::string Intrinsic::getMangledName(bool ForceClassS) {
+ // Check if the prototype has a scalar operand with the type of the vector
+ // elements. If not, bitcasting the args will take care of arg checking.
+ // The actual signedness etc. will be taken care of with special enums.
+ ClassKind LocalCK = CK;
+ if (!protoHasScalar())
+ LocalCK = ClassB;
- // Handle Special Cases.
- const bool IsNotVExt = !NameRef.count("vext");
- const bool IsVPADAL = NameRef.count("vpadal");
- const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
- HasLanePostfix);
- const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
- IsQuad);
-
- if (IsSpecialLaneMul) {
- // If
- NormedProto[2] = NormedProto[3];
- NormedProto.erase(3);
- } else if (NormedProto.size() == 4 &&
- NormedProto[0] == NormedProto[1] &&
- IsNotVExt) {
- // If NormedProto.size() == 4 and the first two proto characters are the
- // same, ignore the first.
- NormedProto = NormedProto.substr(1, 3);
- } else if (Is5OpLaneAccum) {
- // If we have a 5 op lane accumulator operation, we take characters 1,2,4
- std::string tmp = NormedProto.substr(1,2);
- tmp += NormedProto[4];
- NormedProto = tmp;
- } else if (IsVPADAL) {
- // If we have VPADAL, ignore the first character.
- NormedProto = NormedProto.substr(0, 2);
- } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
- // If our instruction is a dup instruction, keep only the first and
- // last characters.
- std::string tmp = "";
- tmp += NormedProto[0];
- tmp += NormedProto[NormedProto.size()-1];
- NormedProto = tmp;
- }
+ return mangleName(Name, ForceClassS ? ClassS : LocalCK);
}
-/// GenerateRegisterCheckPatterns - Given a bunch of data we have
-/// extracted, generate a FileCheck pattern to check that an
-/// instruction's arguments are correct.
-static void GenerateRegisterCheckPattern(const std::string &Name,
- const std::string &Proto,
- const std::string &OutTypeCode,
- const bool &HasNPostfix,
- const bool &IsQuad,
- const bool &HasLanePostfix,
- const bool &HasDupPostfix,
- const size_t &TBNumber,
- std::string &RegisterSuffix) {
+std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
+ std::string typeCode = getInstTypeCode(BaseType, LocalCK);
+ std::string S = Name;
- RegisterSuffix = "";
+ if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
+ Name == "vcvt_f64_f32")
+ return Name;
- const StringRef NameRef(Name);
-
- if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
- return;
+ if (typeCode.size() > 0) {
+ // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
+ if (Name.size() >= 3 && isdigit(Name.back()) &&
+ Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
+ S.insert(S.length() - 3, "_" + typeCode);
+ else
+ S += "_" + typeCode;
}
- const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
- const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
-
- if (IsLoadStore) {
- // Grab N value from v{ld,st}N using its ascii representation.
- const size_t Count = NameRef[3] - 48;
-
- GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
- HasDupPostfix, HasLanePostfix,
- Count, RegisterSuffix);
- } else if (IsTBXOrTBL) {
- RegisterSuffix += "d{{[0-9]+}}, {";
- for (size_t i = 0; i < TBNumber-1; i++) {
- RegisterSuffix += "d{{[0-9]+}}, ";
- }
- RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
- } else {
- // Handle a normal instruction.
- if (NameRef.count("vget") || NameRef.count("vset"))
- return;
-
- // We first normalize our proto, since we only need to emit 4
- // different types of checks, yet have more than 4 proto types
- // that map onto those 4 patterns.
- std::string NormalizedProto("");
- NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
- HasLanePostfix, HasDupPostfix,
- NormalizedProto);
-
- for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
- const char &c = NormalizedProto[i];
- switch (c) {
- case 'q':
- RegisterSuffix += "q{{[0-9]+}}, ";
- break;
-
- case 'd':
- RegisterSuffix += "d{{[0-9]+}}, ";
- break;
+ if (BaseType != InBaseType) {
+ // A reinterpret - out the input base type at the end.
+ S += "_" + getInstTypeCode(InBaseType, LocalCK);
+ }
- case 'i':
- RegisterSuffix += "#{{[0-9]+}}, ";
- break;
+ if (LocalCK == ClassB)
+ S += "_v";
- case 'a':
- RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
- break;
- }
+ // Insert a 'q' before the first '_' character so that it ends up before
+ // _lane or _n on vector-scalar operations.
+ if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
+ size_t Pos = S.find('_');
+ S.insert(Pos, "q");
+ }
+
+ char Suffix = '\0';
+ if (BaseType.isScalarForMangling()) {
+ switch (BaseType.getElementSizeInBits()) {
+ case 8: Suffix = 'b'; break;
+ case 16: Suffix = 'h'; break;
+ case 32: Suffix = 's'; break;
+ case 64: Suffix = 'd'; break;
+ default: assert(0 && "Bad suffix!");
}
-
- // Remove extra ", ".
- RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
}
-}
-
-/// GenerateChecksForIntrinsic - Given a specific instruction name +
-/// typestr + class kind, generate the proper set of FileCheck
-/// Patterns to check for. We could just return a string, but instead
-/// use a vector since it provides us with the extra flexibility of
-/// emitting multiple checks, which comes in handy for certain cases
-/// like mla where we want to check for 2 different instructions.
-static void GenerateChecksForIntrinsic(const std::string &Name,
- const std::string &Proto,
- StringRef &OutTypeStr,
- StringRef &InTypeStr,
- ClassKind Ck,
- const std::string &InstName,
- bool IsHiddenLOp,
- std::vector<std::string>& Result) {
-
- // If Ck is a ClassNoTest instruction, just return so no test is
- // emitted.
- if(Ck == ClassNoTest)
- return;
-
- if (Name == "vcvt_f32_f16") {
- Result.push_back("vcvt.f32.f16");
- return;
+ if (Suffix != '\0') {
+ size_t Pos = S.find('_');
+ S.insert(Pos, &Suffix, 1);
}
+ return S;
+}
- // Now we preprocess our instruction given the data we have to get the
- // data that we need.
- // Create a StringRef for String Manipulation of our Name.
- const StringRef NameRef(Name);
- // Instruction Prefix.
- std::string Prefix;
- // The type code for our out type string.
- std::string OutTypeCode;
- // To handle our different cases, we need to check for different postfixes.
- // Is our instruction a quad instruction.
- bool IsQuad = false;
- // Our instruction is of the form <instructionname>_n.
- bool HasNPostfix = false;
- // Our instruction is of the form <instructionname>_lane.
- bool HasLanePostfix = false;
- // Our instruction is of the form <instructionname>_dup.
- bool HasDupPostfix = false;
- // Our instruction is a vcvt instruction which requires special handling.
- bool IsSpecialVCvt = false;
- // If we have a vtbxN or vtblN instruction, this is set to N.
- size_t TBNumber = -1;
- // Register Suffix
- std::string RegisterSuffix;
-
- PreprocessInstruction(NameRef, InstName, Prefix,
- HasNPostfix, HasLanePostfix, HasDupPostfix,
- IsSpecialVCvt, TBNumber);
-
- InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
- GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
- HasLanePostfix, HasDupPostfix, TBNumber,
- RegisterSuffix);
-
- // In the following section, we handle a bunch of special cases. You can tell
- // a special case by the fact we are returning early.
-
- // If our instruction is a logical instruction without postfix or a
- // hidden LOp just return the current Prefix.
- if (Ck == ClassL || IsHiddenLOp) {
- Result.push_back(Prefix + " " + RegisterSuffix);
- return;
- }
+std::string Intrinsic::replaceParamsIn(std::string S) {
+ while (S.find('$') != std::string::npos) {
+ size_t Pos = S.find('$');
+ size_t End = Pos + 1;
+ while (isalpha(S[End]))
+ ++End;
- // If we have a vmov, due to the many different cases, some of which
- // vary within the different intrinsics generated for a single
- // instruction type, just output a vmov. (e.g. given an instruction
- // A, A.u32 might be vmov and A.u8 might be vmov.8).
- //
- // FIXME: Maybe something can be done about this. The two cases that we care
- // about are vmov as an LType and vmov as a WType.
- if (Prefix == "vmov") {
- Result.push_back(Prefix + " " + RegisterSuffix);
- return;
+ std::string VarName = S.substr(Pos + 1, End - Pos - 1);
+ assert_with_loc(Variables.find(VarName) != Variables.end(),
+ "Variable not defined!");
+ S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
}
- // In the following section, we handle special cases.
+ return S;
+}
- if (OutTypeCode == "64") {
- // If we have a 64 bit vdup/vext and are handling an uint64x1_t
- // type, the intrinsic will be optimized away, so just return
- // nothing. On the other hand if we are handling an uint64x2_t
- // (i.e. quad instruction), vdup/vmov instructions should be
- // emitted.
- if (Prefix == "vdup" || Prefix == "vext") {
- if (IsQuad) {
- Result.push_back("{{vmov|vdup}}");
- }
- return;
- }
+void Intrinsic::initVariables() {
+ Variables.clear();
- // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
- // multiple register operands.
- bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
- || Prefix == "vld4";
- bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
- || Prefix == "vst4";
- if (MultiLoadPrefix || MultiStorePrefix) {
- Result.push_back(NameRef.slice(0, 3).str() + "1.64");
- return;
- }
+ // Modify the TypeSpec per-argument to get a concrete Type, and create
+ // known variables for each.
+ for (unsigned I = 1; I < Proto.size(); ++I) {
+ char NameC = '0' + (I - 1);
+ std::string Name = "p";
+ Name.push_back(NameC);
- // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
- // emitting said instructions. So return a check for
- // vldr/vstr/vmov/str instead.
- if (HasLanePostfix || HasDupPostfix) {
- if (Prefix == "vst1") {
- Result.push_back("{{str|vstr|vmov}}");
- return;
- } else if (Prefix == "vld1") {
- Result.push_back("{{ldr|vldr|vmov}}");
- return;
- }
- }
+ Variables[Name] = Variable(Types[I], Name + VariablePostfix);
}
+ RetVar = Variable(Types[0], "ret" + VariablePostfix);
+}
- // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
- // sometimes disassembled as vtrn.32. We use a regex to handle both
- // cases.
- if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
- Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
- return;
- }
+void Intrinsic::emitPrototype() {
+ if (UseMacro)
+ OS << "#define ";
+ else
+ OS << "__ai " << Types[0].str() << " ";
- // Currently on most ARM processors, we do not use vmla/vmls for
- // quad floating point operations. Instead we output vmul + vadd. So
- // check if we have one of those instructions and just output a
- // check for vmul.
- if (OutTypeCode == "f32") {
- if (Prefix == "vmls") {
- Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
- Result.push_back("vsub." + OutTypeCode);
- return;
- } else if (Prefix == "vmla") {
- Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
- Result.push_back("vadd." + OutTypeCode);
- return;
- }
- }
+ OS << mangleName(Name, ClassS) << "(";
- // If we have vcvt, get the input type from the instruction name
- // (which should be of the form instname_inputtype) and append it
- // before the output type.
- if (Prefix == "vcvt") {
- const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
- Prefix += "." + inTypeCode;
- }
+ for (unsigned I = 0; I < getNumParams(); ++I) {
+ if (I != 0)
+ OS << ", ";
- // Append output type code to get our final mangled instruction.
- Prefix += "." + OutTypeCode;
+ char NameC = '0' + I;
+ std::string Name = "p";
+ Name.push_back(NameC);
+ assert(Variables.find(Name) != Variables.end());
+ Variable &V = Variables[Name];
- Result.push_back(Prefix + " " + RegisterSuffix);
-}
+ if (!UseMacro)
+ OS << V.getType().str() << " ";
+ OS << V.getName();
+ }
-/// UseMacro - Examine the prototype string to determine if the intrinsic
-/// should be defined as a preprocessor macro instead of an inline function.
-static bool UseMacro(const std::string &proto, StringRef typestr) {
- // If this builtin takes an immediate argument, we need to #define it rather
- // than use a standard declaration, so that SemaChecking can range check
- // the immediate passed by the user.
- if (proto.find('i') != std::string::npos)
- return true;
-
- // Pointer arguments need to use macros to avoid hiding aligned attributes
- // from the pointer type.
- if (proto.find('p') != std::string::npos ||
- proto.find('c') != std::string::npos)
- return true;
-
- // It is not permitted to pass or return an __fp16 by value, so intrinsics
- // taking a scalar float16_t must be implemented as macros.
- if (typestr.find('h') != std::string::npos &&
- proto.find('s') != std::string::npos)
- return true;
-
- return false;
+ OS << ")";
}
-/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
-/// defined as a macro should be accessed directly instead of being first
-/// assigned to a local temporary.
-static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
- // True for constant ints (i), pointers (p) and const pointers (c).
- return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
+void Intrinsic::emitOpeningBrace() {
+ if (UseMacro)
+ OS << " __extension__ ({";
+ else
+ OS << " {";
+ emitNewLine();
}
-// Generate the string "(argtype a, argtype b, ...)"
-static std::string GenArgs(const std::string &proto, StringRef typestr,
- const std::string &name) {
- bool define = UseMacro(proto, typestr);
- char arg = 'a';
-
- std::string s;
- s += "(";
-
- for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
- if (define) {
- // Some macro arguments are used directly instead of being assigned
- // to local temporaries; prepend an underscore prefix to make their
- // names consistent with the local temporaries.
- if (MacroArgUsedDirectly(proto, i))
- s += "__";
- } else {
- s += TypeString(proto[i], typestr) + " __";
- }
- s.push_back(arg);
- if ((i + 1) < e)
- s += ", ";
- }
+void Intrinsic::emitClosingBrace() {
+ if (UseMacro)
+ OS << "})";
+ else
+ OS << "}";
+}
- s += ")";
- return s;
+void Intrinsic::emitNewLine() {
+ if (UseMacro)
+ OS << " \\\n";
+ else
+ OS << "\n";
}
-// Macro arguments are not type-checked like inline function arguments, so
-// assign them to local temporaries to get the right type checking.
-static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
- const std::string &name ) {
- char arg = 'a';
- std::string s;
- bool generatedLocal = false;
+void Intrinsic::emitShadowedArgs() {
+ // Macro arguments are not type-checked like inline function arguments,
+ // so assign them to local temporaries to get the right type checking.
+ if (!UseMacro)
+ return;
- for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+ for (unsigned I = 0; I < getNumParams(); ++I) {
// Do not create a temporary for an immediate argument.
// That would defeat the whole point of using a macro!
- if (MacroArgUsedDirectly(proto, i))
+ if (hasImmediate() && Proto[I+1] == 'i')
+ continue;
+ // Do not create a temporary for pointer arguments. The input
+ // pointer may have an alignment hint.
+ if (getParamType(I).isPointer())
continue;
- generatedLocal = true;
- s += TypeString(proto[i], typestr) + " __";
- s.push_back(arg);
- s += " = (";
- s.push_back(arg);
- s += "); ";
- }
-
- if (generatedLocal)
- s += "\\\n ";
- return s;
-}
-
-// Use the vmovl builtin to sign-extend or zero-extend a vector.
-static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
- std::string s, high;
- high = h ? "_high" : "";
- s = MangleName("vmovl" + high, typestr, ClassS);
- s += "(" + a + ")";
- return s;
-}
-
-// Get the high 64-bit part of a vector
-static std::string GetHigh(const std::string &a, StringRef typestr) {
- std::string s;
- s = MangleName("vget_high", typestr, ClassS);
- s += "(" + a + ")";
- return s;
-}
-// Gen operation with two operands and get high 64-bit for both of two operands.
-static std::string Gen2OpWith2High(StringRef typestr,
- const std::string &op,
- const std::string &a,
- const std::string &b) {
- std::string s;
- std::string Op1 = GetHigh(a, typestr);
- std::string Op2 = GetHigh(b, typestr);
- s = MangleName(op, typestr, ClassS);
- s += "(" + Op1 + ", " + Op2 + ");";
- return s;
-}
+ char NameC = '0' + I;
+ std::string Name = "p";
+ Name.push_back(NameC);
-// Gen operation with three operands and get high 64-bit of the latter
-// two operands.
-static std::string Gen3OpWith2High(StringRef typestr,
- const std::string &op,
- const std::string &a,
- const std::string &b,
- const std::string &c) {
- std::string s;
- std::string Op1 = GetHigh(b, typestr);
- std::string Op2 = GetHigh(c, typestr);
- s = MangleName(op, typestr, ClassS);
- s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
- return s;
-}
+ assert(Variables.find(Name) != Variables.end());
+ Variable &V = Variables[Name];
-// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
-static std::string GenCombine(std::string typestr,
- const std::string &a,
- const std::string &b) {
- std::string s;
- s = MangleName("vcombine", typestr, ClassS);
- s += "(" + a + ", " + b + ")";
- return s;
-}
+ std::string NewName = "s" + utostr(I);
+ Variable V2(V.getType(), NewName + VariablePostfix);
-static std::string Duplicate(unsigned nElts, StringRef typestr,
- const std::string &a) {
- std::string s;
+ OS << " " << V2.getType().str() << " " << V2.getName() << " = "
+ << V.getName() << ";";
+ emitNewLine();
- s = "(" + TypeString('d', typestr) + "){ ";
- for (unsigned i = 0; i != nElts; ++i) {
- s += a;
- if ((i + 1) < nElts)
- s += ", ";
+ V = V2;
}
- s += " }";
-
- return s;
}
-static std::string SplatLane(unsigned nElts, const std::string &vec,
- const std::string &lane) {
- std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
- for (unsigned i = 0; i < nElts; ++i)
- s += ", " + lane;
- s += ")";
- return s;
+// We don't check 'a' in this function, because for builtin function the
+// argument matching to 'a' uses a vector type splatted from a scalar type.
+bool Intrinsic::protoHasScalar() {
+ return (Proto.find('s') != std::string::npos ||
+ Proto.find('z') != std::string::npos ||
+ Proto.find('r') != std::string::npos ||
+ Proto.find('b') != std::string::npos ||
+ Proto.find('$') != std::string::npos ||
+ Proto.find('y') != std::string::npos ||
+ Proto.find('o') != std::string::npos);
}
-static std::string RemoveHigh(const std::string &name) {
- std::string s = name;
- std::size_t found = s.find("_high_");
- if (found == std::string::npos)
- PrintFatalError("name should contain \"_high_\" for high intrinsics");
- s.replace(found, 5, "");
- return s;
-}
+void Intrinsic::emitBodyAsBuiltinCall() {
+ std::string S;
-static unsigned GetNumElements(StringRef typestr, bool &quad) {
- quad = false;
- bool dummy = false;
- char type = ClassifyType(typestr, quad, dummy, dummy);
- unsigned nElts = 0;
- switch (type) {
- case 'c': nElts = 8; break;
- case 's': nElts = 4; break;
- case 'i': nElts = 2; break;
- case 'l': nElts = 1; break;
- case 'k': nElts = 1; break;
- case 'h': nElts = 4; break;
- case 'f': nElts = 2; break;
- case 'd':
- nElts = 1;
- break;
- default:
- PrintFatalError("unhandled type!");
- }
- if (quad) nElts <<= 1;
- return nElts;
-}
+ // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
+ // sret-like argument.
+ bool SRet = getReturnType().getNumVectors() >= 2;
-// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
-//
-// Note that some intrinsic definitions around 'lane' are being implemented
-// with macros, because they all contain constant integer argument, and we
-// statically check the range of the lane index to meet the semantic
-// requirement of different intrinsics.
-//
-// For the intrinsics implemented with macro, if they contain another intrinsic
-// implemented with maco, we have to avoid using the same argument names for
-// the nested instrinsics. For example, macro vfms_lane is being implemented
-// with another macor vfma_lane, so we rename all arguments for vfms_lane by
-// adding a suffix '1'.
-
-static std::string GenOpString(const std::string &name, OpKind op,
- const std::string &proto, StringRef typestr) {
- bool quad;
- unsigned nElts = GetNumElements(typestr, quad);
- bool define = UseMacro(proto, typestr);
-
- std::string ts = TypeString(proto[0], typestr);
- std::string s;
- if (!define) {
- s = "return ";
- }
-
- switch(op) {
- case OpAdd:
- s += "__a + __b;";
- break;
- case OpAddl:
- s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
- break;
- case OpAddlHi:
- s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
- break;
- case OpAddw:
- s += "__a + " + Extend(typestr, "__b") + ";";
- break;
- case OpAddwHi:
- s += "__a + " + Extend(typestr, "__b", 1) + ";";
- break;
- case OpSub:
- s += "__a - __b;";
- break;
- case OpSubl:
- s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
- break;
- case OpSublHi:
- s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
- break;
- case OpSubw:
- s += "__a - " + Extend(typestr, "__b") + ";";
- break;
- case OpSubwHi:
- s += "__a - " + Extend(typestr, "__b", 1) + ";";
- break;
- case OpMulN:
- s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
- break;
- case OpMulLane:
- s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
- break;
- case OpMulXLane:
- s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
- SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpMul:
- s += "__a * __b;";
- break;
- case OpFMlaN:
- s += MangleName("vfma", typestr, ClassS);
- s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
- break;
- case OpFMlsN:
- s += MangleName("vfms", typestr, ClassS);
- s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
- break;
- case OpMullLane:
- s += MangleName("vmull", typestr, ClassS) + "(__a, " +
- SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpMullHiLane:
- s += MangleName("vmull", typestr, ClassS) + "(" +
- GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpMlaN:
- s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
- break;
- case OpMlaLane:
- s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpMla:
- s += "__a + (__b * __c);";
- break;
- case OpMlalN:
- s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
- Duplicate(nElts, typestr, "__c") + ");";
- break;
- case OpMlalLane:
- s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
- SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpMlalHiLane:
- s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
- GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpMlal:
- s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
- break;
- case OpMullHi:
- s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
- break;
- case OpMullHiP64: {
- std::string Op1 = GetHigh("__a", typestr);
- std::string Op2 = GetHigh("__b", typestr);
- s += MangleName("vmull", typestr, ClassS);
- s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");";
- break;
- }
- case OpMullHiN:
- s += MangleName("vmull_n", typestr, ClassS);
- s += "(" + GetHigh("__a", typestr) + ", __b);";
- return s;
- case OpMlalHi:
- s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
- break;
- case OpMlalHiN:
- s += MangleName("vmlal_n", typestr, ClassS);
- s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
- return s;
- case OpMlsN:
- s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
- break;
- case OpMlsLane:
- s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpFMSLane:
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
- s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
- break;
- case OpFMSLaneQ:
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
- s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
- break;
- case OpMls:
- s += "__a - (__b * __c);";
- break;
- case OpMlslN:
- s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
- Duplicate(nElts, typestr, "__c") + ");";
- break;
- case OpMlslLane:
- s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
- SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpMlslHiLane:
- s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
- GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpMlsl:
- s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
- break;
- case OpMlslHi:
- s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
- break;
- case OpMlslHiN:
- s += MangleName("vmlsl_n", typestr, ClassS);
- s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
- break;
- case OpQDMullLane:
- s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
- SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpQDMullHiLane:
- s += MangleName("vqdmull", typestr, ClassS) + "(" +
- GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpQDMlalLane:
- s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
- SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpQDMlalHiLane:
- s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
- GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpQDMlslLane:
- s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
- SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpQDMlslHiLane:
- s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
- GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
- break;
- case OpQDMulhLane:
- s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
- SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpQRDMulhLane:
- s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
- SplatLane(nElts, "__b", "__c") + ");";
- break;
- case OpEq:
- s += "(" + ts + ")(__a == __b);";
- break;
- case OpGe:
- s += "(" + ts + ")(__a >= __b);";
- break;
- case OpLe:
- s += "(" + ts + ")(__a <= __b);";
- break;
- case OpGt:
- s += "(" + ts + ")(__a > __b);";
- break;
- case OpLt:
- s += "(" + ts + ")(__a < __b);";
- break;
- case OpNeg:
- s += " -__a;";
- break;
- case OpNot:
- s += " ~__a;";
- break;
- case OpAnd:
- s += "__a & __b;";
- break;
- case OpOr:
- s += "__a | __b;";
- break;
- case OpXor:
- s += "__a ^ __b;";
- break;
- case OpAndNot:
- s += "__a & ~__b;";
- break;
- case OpOrNot:
- s += "__a | ~__b;";
- break;
- case OpCast:
- s += "(" + ts + ")__a;";
- break;
- case OpConcat:
- s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
- s += ", (int64x1_t)__b, 0, 1);";
- break;
- case OpHi:
- // nElts is for the result vector, so the source is twice that number.
- s += "__builtin_shufflevector(__a, __a";
- for (unsigned i = nElts; i < nElts * 2; ++i)
- s += ", " + utostr(i);
- s+= ");";
- break;
- case OpLo:
- s += "__builtin_shufflevector(__a, __a";
- for (unsigned i = 0; i < nElts; ++i)
- s += ", " + utostr(i);
- s+= ");";
- break;
- case OpDup:
- s += Duplicate(nElts, typestr, "__a") + ";";
- break;
- case OpDupLane:
- s += SplatLane(nElts, "__a", "__b") + ";";
- break;
- case OpSelect:
- // ((0 & 1) | (~0 & 2))
- s += "(" + ts + ")";
- ts = TypeString(proto[1], typestr);
- s += "((__a & (" + ts + ")__b) | ";
- s += "(~__a & (" + ts + ")__c));";
- break;
- case OpRev16:
- s += "__builtin_shufflevector(__a, __a";
- for (unsigned i = 2; i <= nElts; i += 2)
- for (unsigned j = 0; j != 2; ++j)
- s += ", " + utostr(i - j - 1);
- s += ");";
- break;
- case OpRev32: {
- unsigned WordElts = nElts >> (1 + (int)quad);
- s += "__builtin_shufflevector(__a, __a";
- for (unsigned i = WordElts; i <= nElts; i += WordElts)
- for (unsigned j = 0; j != WordElts; ++j)
- s += ", " + utostr(i - j - 1);
- s += ");";
- break;
- }
- case OpRev64: {
- unsigned DblWordElts = nElts >> (int)quad;
- s += "__builtin_shufflevector(__a, __a";
- for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
- for (unsigned j = 0; j != DblWordElts; ++j)
- s += ", " + utostr(i - j - 1);
- s += ");";
- break;
- }
- case OpXtnHi: {
- s = TypeString(proto[1], typestr) + " __a1 = " +
- MangleName("vmovn", typestr, ClassS) + "(__b);\n " +
- "return __builtin_shufflevector(__a, __a1";
- for (unsigned i = 0; i < nElts * 4; ++i)
- s += ", " + utostr(i);
- s += ");";
- break;
- }
- case OpSqxtunHi: {
- s = TypeString(proto[1], typestr) + " __a1 = " +
- MangleName("vqmovun", typestr, ClassS) + "(__b);\n " +
- "return __builtin_shufflevector(__a, __a1";
- for (unsigned i = 0; i < nElts * 4; ++i)
- s += ", " + utostr(i);
- s += ");";
- break;
- }
- case OpQxtnHi: {
- s = TypeString(proto[1], typestr) + " __a1 = " +
- MangleName("vqmovn", typestr, ClassS) + "(__b);\n " +
- "return __builtin_shufflevector(__a, __a1";
- for (unsigned i = 0; i < nElts * 4; ++i)
- s += ", " + utostr(i);
- s += ");";
- break;
- }
- case OpFcvtnHi: {
- std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
- s = TypeString(proto[1], typestr) + " __a1 = " +
- MangleName(FName, typestr, ClassS) + "(__b);\n " +
- "return __builtin_shufflevector(__a, __a1";
- for (unsigned i = 0; i < nElts * 4; ++i)
- s += ", " + utostr(i);
- s += ");";
- break;
- }
- case OpFcvtlHi: {
- std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
- s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
- ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);";
- break;
- }
- case OpFcvtxnHi: {
- s = TypeString(proto[1], typestr) + " __a1 = " +
- MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " +
- "return __builtin_shufflevector(__a, __a1";
- for (unsigned i = 0; i < nElts * 4; ++i)
- s += ", " + utostr(i);
- s += ");";
- break;
- }
- case OpUzp1:
- s += "__builtin_shufflevector(__a, __b";
- for (unsigned i = 0; i < nElts; i++)
- s += ", " + utostr(2*i);
- s += ");";
- break;
- case OpUzp2:
- s += "__builtin_shufflevector(__a, __b";
- for (unsigned i = 0; i < nElts; i++)
- s += ", " + utostr(2*i+1);
- s += ");";
- break;
- case OpZip1:
- s += "__builtin_shufflevector(__a, __b";
- for (unsigned i = 0; i < (nElts/2); i++)
- s += ", " + utostr(i) + ", " + utostr(i+nElts);
- s += ");";
- break;
- case OpZip2:
- s += "__builtin_shufflevector(__a, __b";
- for (unsigned i = nElts/2; i < nElts; i++)
- s += ", " + utostr(i) + ", " + utostr(i+nElts);
- s += ");";
- break;
- case OpTrn1:
- s += "__builtin_shufflevector(__a, __b";
- for (unsigned i = 0; i < (nElts/2); i++)
- s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
- s += ");";
- break;
- case OpTrn2:
- s += "__builtin_shufflevector(__a, __b";
- for (unsigned i = 0; i < (nElts/2); i++)
- s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
- s += ");";
- break;
- case OpAbdl: {
- std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
- if (typestr[0] != 'U') {
- // vabd results are always unsigned and must be zero-extended.
- std::string utype = "U" + typestr.str();
- s += "(" + TypeString(proto[0], typestr) + ")";
- abd = "(" + TypeString('d', utype) + ")" + abd;
- s += Extend(utype, abd) + ";";
- } else {
- s += Extend(typestr, abd) + ";";
- }
- break;
- }
- case OpAbdlHi:
- s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
- break;
- case OpAddhnHi: {
- std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
- s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
- s += ";";
- break;
- }
- case OpRAddhnHi: {
- std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
- s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
- s += ";";
- break;
- }
- case OpSubhnHi: {
- std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
- s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
- s += ";";
- break;
- }
- case OpRSubhnHi: {
- std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
- s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
- s += ";";
- break;
- }
- case OpAba:
- s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
- break;
- case OpAbal:
- s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
- break;
- case OpAbalHi:
- s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
- break;
- case OpQDMullHi:
- s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
- break;
- case OpQDMullHiN:
- s += MangleName("vqdmull_n", typestr, ClassS);
- s += "(" + GetHigh("__a", typestr) + ", __b);";
- return s;
- case OpQDMlalHi:
- s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
- break;
- case OpQDMlalHiN:
- s += MangleName("vqdmlal_n", typestr, ClassS);
- s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
- return s;
- case OpQDMlslHi:
- s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
- break;
- case OpQDMlslHiN:
- s += MangleName("vqdmlsl_n", typestr, ClassS);
- s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
- return s;
- case OpDiv:
- s += "__a / __b;";
- break;
- case OpMovlHi: {
- s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
- MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s;
- s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
- s += "(__a1, 0);";
- break;
- }
- case OpLongHi: {
- // Another local variable __a1 is needed for calling a Macro,
- // or using __a will have naming conflict when Macro expanding.
- s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
- MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
- s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
- "(__a1, __b);";
- break;
- }
- case OpNarrowHi: {
- s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
- MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
- break;
- }
- case OpCopyLane: {
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
- s += TypeString('s', typestr) + " __c2 = " +
- MangleName("vget_lane", typestr, ClassS) + "(__c1, __d); \\\n " +
- MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b);";
- break;
- }
- case OpCopyQLane: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
- s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
- "(__c1, __d); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b);";
- break;
- }
- case OpCopyLaneQ: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
- s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
- "(__c1, __d); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b);";
- break;
- }
- case OpScalarMulLane: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
- "(__b, __c);\\\n __a * __d1;";
- break;
- }
- case OpScalarMulLaneQ: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
- "(__b1, __c);\\\n __a1 * __d1;";
- break;
- }
- case OpScalarMulXLane: {
- bool dummy = false;
- char type = ClassifyType(typestr, dummy, dummy, dummy);
- if (type == 'f') type = 's';
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
- "(__b1, __c);\\\n vmulx" + type + "_" +
- typeCode + "(__a1, __d1);";
- break;
- }
- case OpScalarMulXLaneQ: {
- bool dummy = false;
- char type = ClassifyType(typestr, dummy, dummy, dummy);
- if (type == 'f') type = 's';
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
- typeCode + "(__b1, __c);\\\n vmulx" + type +
- "_" + typeCode + "(__a1, __d1);";
- break;
+ StringRef N = Name;
+ if (hasSplat()) {
+ // Call the non-splat builtin: chop off the "_n" suffix from the name.
+ assert(N.endswith("_n"));
+ N = N.drop_back(2);
}
- case OpScalarVMulXLane: {
- bool dummy = false;
- char type = ClassifyType(typestr, dummy, dummy, dummy);
- if (type == 'f') type = 's';
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString('s', typestr) + " __d1 = vget_lane_" +
- typeCode + "(__a1, 0);\\\n" +
- " " + TypeString('s', typestr) + " __e1 = vget_lane_" +
- typeCode + "(__b1, __c);\\\n" +
- " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
- typeCode + "(__d1, __e1);\\\n" +
- " " + TypeString('d', typestr) + " __g1;\\\n" +
- " vset_lane_" + typeCode + "(__f1, __g1, __c);";
- break;
- }
+ ClassKind LocalCK = CK;
+ if (!protoHasScalar())
+ LocalCK = ClassB;
- case OpScalarVMulXLaneQ: {
- bool dummy = false;
- char type = ClassifyType(typestr, dummy, dummy, dummy);
- if (type == 'f') type = 's';
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += TypeString('s', typestr) + " __d1 = vget_lane_" +
- typeCode + "(__a1, 0);\\\n" +
- " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
- typeCode + "(__b1, __c);\\\n" +
- " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
- typeCode + "(__d1, __e1);\\\n" +
- " " + TypeString('d', typestr) + " __g1;\\\n" +
- " vset_lane_" + typeCode + "(__f1, __g1, 0);";
- break;
- }
- case OpScalarQDMullLane: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
- "vget_lane_" + typeCode + "(__b1, __c));";
- break;
- }
- case OpScalarQDMullLaneQ: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
- "vgetq_lane_" + typeCode + "(__b1, __c));";
- break;
- }
- case OpScalarQDMulHiLane: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
- "vget_lane_" + typeCode + "(__b1, __c));";
- break;
- }
- case OpScalarQDMulHiLaneQ: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
- "vgetq_lane_" + typeCode + "(__b1, __c));";
- break;
- }
- case OpScalarQRDMulHiLane: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
- "vget_lane_" + typeCode + "(__b1, __c));";
- break;
- }
- case OpScalarQRDMulHiLaneQ: {
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
- s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
- "vgetq_lane_" + typeCode + "(__b1, __c));";
- break;
- }
- case OpScalarGetLane:{
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
+ if (!getReturnType().isVoid() && !SRet)
+ S += "(" + RetVar.getType().str() + ") ";
- std::string intType = quad ? "int16x8_t" : "int16x4_t";
- std::string intName = quad ? "vgetq" : "vget";
+ S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
- // reinterpret float16 vector as int16 vector
- s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
+ if (SRet)
+ S += "&" + RetVar.getName() + ", ";
- s += " int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
+ for (unsigned I = 0; I < getNumParams(); ++I) {
+ Variable &V = Variables["p" + utostr(I)];
+ Type T = V.getType();
- // reinterpret int16 vector as float16 vector
- s += " float16_t __a4 = *(float16_t *)(&__a3);\\\n";
- s += " __a4;";
- break;
- }
- case OpScalarSetLane:{
- std::string typeCode = "";
- InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n ";
+ // Handle multiple-vector values specially, emitting each subvector as an
+ // argument to the builtin.
+ if (T.getNumVectors() > 1) {
+ // Check if an explicit cast is needed.
+ std::string Cast;
+ if (T.isChar() || T.isPoly() || !T.isSigned()) {
+ Type T2 = T;
+ T2.makeOneVector();
+ T2.makeInteger(8, /*Signed=*/true);
+ Cast = "(" + T2.str() + ")";
+ }
- std::string origType = quad ? "float16x8_t" : "float16x4_t";
- std::string intType = quad ? "int16x8_t" : "int16x4_t";
- std::string intName = quad ? "vsetq" : "vset";
+ for (unsigned J = 0; J < T.getNumVectors(); ++J)
+ S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
+ continue;
+ }
- // reinterpret float16_t as int16_t
- s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
- // reinterpret float16 vector as int16 vector
- s += " " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
+ std::string Arg;
+ Type CastToType = T;
+ if (hasSplat() && I == getSplatIdx()) {
+ Arg = "(" + BaseType.str() + ") {";
+ for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
+ if (J != 0)
+ Arg += ", ";
+ Arg += V.getName();
+ }
+ Arg += "}";
- s += " " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
+ CastToType = BaseType;
+ } else {
+ Arg = V.getName();
+ }
- // reinterpret int16 vector as float16 vector
- s += " " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
- s += "__b4;";
- break;
+ // Check if an explicit cast is needed.
+ if (CastToType.isVector()) {
+ CastToType.makeInteger(8, true);
+ Arg = "(" + CastToType.str() + ")" + Arg;
+ }
+
+ S += Arg + ", ";
}
- default:
- PrintFatalError("unknown OpKind!");
+ // Extra constant integer to hold type class enum for this function, e.g. s8
+ if (getClassKind(true) == ClassB) {
+ Type ThisTy = getReturnType();
+ if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
+ ThisTy = getParamType(0);
+ if (ThisTy.isPointer())
+ ThisTy = getParamType(1);
+
+ S += utostr(ThisTy.getNeonEnum());
+ } else {
+ // Remove extraneous ", ".
+ S.pop_back();
+ S.pop_back();
}
- return s;
+ S += ");";
+
+ std::string RetExpr;
+ if (!SRet && !RetVar.getType().isVoid())
+ RetExpr = RetVar.getName() + " = ";
+
+ OS << " " << RetExpr << S;
+ emitNewLine();
}
-static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
- unsigned mod = proto[0];
+void Intrinsic::emitBody() {
+ std::vector<std::string> Lines;
- if (mod == 'v' || mod == 'f' || mod == 'F')
- mod = proto[1];
+ assert(RetVar.getType() == Types[0]);
+ // Create a return variable, if we're not void.
+ if (!RetVar.getType().isVoid()) {
+ OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
+ emitNewLine();
+ }
- bool quad = false;
- bool poly = false;
- bool usgn = false;
- bool scal = false;
- bool cnst = false;
- bool pntr = false;
+ if (!Body || Body->getValues().size() == 0) {
+ // Nothing specific to output - must output a builtin.
+ emitBodyAsBuiltinCall();
+ return;
+ }
- // Base type to get the type string for.
- char type = ClassifyType(typestr, quad, poly, usgn);
+ // We have a list of "things to output". The last should be returned.
+ for (auto *I : Body->getValues()) {
+ if (StringInit *SI = dyn_cast<StringInit>(I)) {
+ Lines.push_back(replaceParamsIn(SI->getAsString()));
+ } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
+ Lines.push_back(emitDag(DI).second + ";");
+ }
+ }
- // Based on the modifying character, change the type and width if necessary.
- type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
+ assert(Lines.size() && "Empty def?");
+ if (!RetVar.getType().isVoid())
+ Lines.back().insert(0, RetVar.getName() + " = ");
- NeonTypeFlags::EltType ET;
- switch (type) {
- case 'c':
- ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
- break;
- case 's':
- ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
- break;
- case 'i':
- ET = NeonTypeFlags::Int32;
- break;
- case 'l':
- ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
- break;
- case 'k':
- ET = NeonTypeFlags::Poly128;
- break;
- case 'h':
- ET = NeonTypeFlags::Float16;
- break;
- case 'f':
- ET = NeonTypeFlags::Float32;
- break;
- case 'd':
- ET = NeonTypeFlags::Float64;
- break;
- default:
- PrintFatalError("unhandled type!");
+ for (auto &L : Lines) {
+ OS << " " << L;
+ emitNewLine();
}
- NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
- return Flags.getFlags();
}
-// We don't check 'a' in this function, because for builtin function the
-// argument matching to 'a' uses a vector type splatted from a scalar type.
-static bool ProtoHasScalar(const std::string proto)
-{
- return (proto.find('s') != std::string::npos
- || proto.find('z') != std::string::npos
- || proto.find('r') != std::string::npos
- || proto.find('b') != std::string::npos
- || proto.find('$') != std::string::npos
- || proto.find('y') != std::string::npos
- || proto.find('o') != std::string::npos);
+void Intrinsic::emitReturn() {
+ if (RetVar.getType().isVoid())
+ return;
+ if (UseMacro)
+ OS << " " << RetVar.getName() << ";";
+ else
+ OS << " return " << RetVar.getName() << ";";
+ emitNewLine();
}
-// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
-static std::string GenBuiltin(const std::string &name, const std::string &proto,
- StringRef typestr, ClassKind ck) {
- std::string s;
-
- // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
- // sret-like argument.
- bool sret = IsMultiVecProto(proto[0]);
-
- bool define = UseMacro(proto, typestr);
-
- // Check if the prototype has a scalar operand with the type of the vector
- // elements. If not, bitcasting the args will take care of arg checking.
- // The actual signedness etc. will be taken care of with special enums.
- if (!ProtoHasScalar(proto))
- ck = ClassB;
-
- if (proto[0] != 'v') {
- std::string ts = TypeString(proto[0], typestr);
-
- if (define) {
- if (sret)
- s += ts + " r; ";
- else
- s += "(" + ts + ")";
- } else if (sret) {
- s += ts + " r; ";
- } else {
- s += "return (" + ts + ")";
- }
- }
-
- bool splat = proto.find('a') != std::string::npos;
+std::pair<Type, std::string> Intrinsic::emitDag(DagInit *DI) {
+ // At this point we should only be seeing a def.
+ DefInit *DefI = cast<DefInit>(DI->getOperator());
+ std::string Op = DefI->getAsString();
+
+ if (Op == "cast" || Op == "bitcast")
+ return emitDagCast(DI, Op == "bitcast");
+ if (Op == "shuffle")
+ return emitDagShuffle(DI);
+ if (Op == "dup")
+ return emitDagDup(DI);
+ if (Op == "splat")
+ return emitDagSplat(DI);
+ if (Op == "save_temp")
+ return emitDagSaveTemp(DI);
+ if (Op == "op")
+ return emitDagOp(DI);
+ if (Op == "call")
+ return emitDagCall(DI);
+ if (Op == "name_replace")
+ return emitDagNameReplace(DI);
+ if (Op == "literal")
+ return emitDagLiteral(DI);
+ assert_with_loc(false, "Unknown operation!");
+ return std::make_pair(Type::getVoid(), "");
+}
- s += "__builtin_neon_";
- if (splat) {
- // Call the non-splat builtin: chop off the "_n" suffix from the name.
- std::string vname(name, 0, name.size()-2);
- s += MangleName(vname, typestr, ck);
+std::pair<Type, std::string> Intrinsic::emitDagOp(DagInit *DI) {
+ std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
+ if (DI->getNumArgs() == 2) {
+ // Unary op.
+ std::pair<Type, std::string> R =
+ emitDagArg(DI->getArg(1), DI->getArgName(1));
+ return std::make_pair(R.first, Op + R.second);
} else {
- s += MangleName(name, typestr, ck);
+ assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
+ std::pair<Type, std::string> R1 =
+ emitDagArg(DI->getArg(1), DI->getArgName(1));
+ std::pair<Type, std::string> R2 =
+ emitDagArg(DI->getArg(2), DI->getArgName(2));
+ assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
+ return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
}
- s += "(";
-
- // Pass the address of the return variable as the first argument to sret-like
- // builtins.
- if (sret)
- s += "&r, ";
+}
- char arg = 'a';
- for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
- std::string args = std::string(&arg, 1);
+std::pair<Type, std::string> Intrinsic::emitDagCall(DagInit *DI) {
+ std::vector<Type> Types;
+ std::vector<std::string> Values;
+ for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
+ std::pair<Type, std::string> R =
+ emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1));
+ Types.push_back(R.first);
+ Values.push_back(R.second);
+ }
- // Use the local temporaries instead of the macro arguments.
- args = "__" + args;
+ // Look up the called intrinsic.
+ std::string N;
+ if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
+ N = SI->getAsUnquotedString();
+ else
+ N = emitDagArg(DI->getArg(0), "").second;
+ Intrinsic *Callee = Emitter.getIntrinsic(N, Types);
+ assert(Callee && "getIntrinsic should not return us nullptr!");
- bool argQuad = false;
- bool argPoly = false;
- bool argUsgn = false;
- bool argScalar = false;
- bool dummy = false;
- char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
- argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
- dummy, dummy);
+ // Make sure the callee is known as an early def.
+ Callee->setNeededEarly();
+ Dependencies.insert(Callee);
- // Handle multiple-vector values specially, emitting each subvector as an
- // argument to the __builtin.
- unsigned NumOfVec = 0;
- if (proto[i] >= '2' && proto[i] <= '4') {
- NumOfVec = proto[i] - '0';
- } else if (proto[i] >= 'B' && proto[i] <= 'D') {
- NumOfVec = proto[i] - 'A' + 1;
- }
+ // Now create the call itself.
+ std::string S = Callee->getMangledName(true) + "(";
+ for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
+ if (I != 0)
+ S += ", ";
+ S += Values[I];
+ }
+ S += ")";
- if (NumOfVec > 0) {
- // Check if an explicit cast is needed.
- if (argType != 'c' || argPoly || argUsgn)
- args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
+ return std::make_pair(Callee->getReturnType(), S);
+}
- for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
- s += args + ".val[" + utostr(vi) + "]";
- if ((vi + 1) < ve)
- s += ", ";
+std::pair<Type, std::string> Intrinsic::emitDagCast(DagInit *DI,
+ bool IsBitCast) {
+ // (cast MOD* VAL) -> cast VAL to type given by MOD.
+ std::pair<Type, std::string> R = emitDagArg(
+ DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1));
+ Type castToType = R.first;
+ for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
+
+ // MOD can take several forms:
+ // 1. $X - take the type of parameter / variable X.
+ // 2. The value "R" - take the type of the return type.
+ // 3. a type string
+ // 4. The value "U" or "S" to switch the signedness.
+ // 5. The value "H" or "D" to half or double the bitwidth.
+ // 6. The value "8" to convert to 8-bit (signed) integer lanes.
+ if (DI->getArgName(ArgIdx).size()) {
+ assert_with_loc(Variables.find(DI->getArgName(ArgIdx)) != Variables.end(),
+ "Variable not found");
+ castToType = Variables[DI->getArgName(ArgIdx)].getType();
+ } else {
+ StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
+ assert_with_loc(SI, "Expected string type or $Name for cast type");
+
+ if (SI->getAsUnquotedString() == "R") {
+ castToType = getReturnType();
+ } else if (SI->getAsUnquotedString() == "U") {
+ castToType.makeUnsigned();
+ } else if (SI->getAsUnquotedString() == "S") {
+ castToType.makeSigned();
+ } else if (SI->getAsUnquotedString() == "H") {
+ castToType.halveLanes();
+ } else if (SI->getAsUnquotedString() == "D") {
+ castToType.doubleLanes();
+ } else if (SI->getAsUnquotedString() == "8") {
+ castToType.makeInteger(8, true);
+ } else {
+ castToType = Type::fromTypedefName(SI->getAsUnquotedString());
+ assert_with_loc(!castToType.isVoid(), "Unknown typedef");
}
- if ((i + 1) < e)
- s += ", ";
-
- continue;
- }
-
- if (splat && (i + 1) == e)
- args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
-
- // Check if an explicit cast is needed.
- if ((splat || !argScalar) &&
- ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
- std::string argTypeStr = "c";
- if (ck != ClassB)
- argTypeStr = argType;
- if (argQuad)
- argTypeStr = "Q" + argTypeStr;
- args = "(" + TypeString('d', argTypeStr) + ")" + args;
}
-
- s += args;
- if ((i + 1) < e)
- s += ", ";
}
- // Extra constant integer to hold type class enum for this function, e.g. s8
- if (ck == ClassB)
- s += ", " + utostr(GetNeonEnum(proto, typestr));
+ std::string S;
+ if (IsBitCast) {
+ // Emit a reinterpret cast. The second operand must be an lvalue, so create
+ // a temporary.
+ std::string N = "reint";
+ unsigned I = 0;
+ while (Variables.find(N) != Variables.end())
+ N = "reint" + utostr(++I);
+ Variables[N] = Variable(R.first, N + VariablePostfix);
- s += ");";
+ OS << R.first.str() << " " << Variables[N].getName() << " = " << R.second
+ << ";";
+ emitNewLine();
- if (proto[0] != 'v' && sret) {
- if (define)
- s += " r;";
- else
- s += " return r;";
+ S = "*(" + castToType.str() + " *) &" + Variables[N].getName() + "";
+ } else {
+ // Emit a normal (static) cast.
+ S = "(" + castToType.str() + ")(" + R.second + ")";
}
- return s;
+
+ return std::make_pair(castToType, S);
}
-static std::string GenBuiltinDef(const std::string &name,
- const std::string &proto,
- StringRef typestr, ClassKind ck) {
- std::string s("BUILTIN(__builtin_neon_");
+std::pair<Type, std::string> Intrinsic::emitDagShuffle(DagInit *DI) {
+ // See the documentation in arm_neon.td for a description of these operators.
+ class LowHalf : public SetTheory::Operator {
+ public:
+ virtual void anchor() {}
+ virtual ~LowHalf() {}
+ virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+ ArrayRef<SMLoc> Loc) {
+ SetTheory::RecSet Elts2;
+ ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
+ Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
+ }
+ };
+ class HighHalf : public SetTheory::Operator {
+ public:
+ virtual void anchor() {}
+ virtual ~HighHalf() {}
+ virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+ ArrayRef<SMLoc> Loc) {
+ SetTheory::RecSet Elts2;
+ ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
+ Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
+ }
+ };
+ class Rev : public SetTheory::Operator {
+ unsigned ElementSize;
+
+ public:
+ Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
+ virtual void anchor() {}
+ virtual ~Rev() {}
+ virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+ ArrayRef<SMLoc> Loc) {
+ SetTheory::RecSet Elts2;
+ ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
+
+ int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
+ VectorSize /= ElementSize;
+
+ std::vector<Record *> Revved;
+ for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
+ for (int LI = VectorSize - 1; LI >= 0; --LI) {
+ Revved.push_back(Elts2[VI + LI]);
+ }
+ }
- // If all types are the same size, bitcasting the args will take care
- // of arg checking. The actual signedness etc. will be taken care of with
- // special enums.
- if (!ProtoHasScalar(proto))
- ck = ClassB;
+ Elts.insert(Revved.begin(), Revved.end());
+ }
+ };
+ class MaskExpander : public SetTheory::Expander {
+ unsigned N;
+
+ public:
+ MaskExpander(unsigned N) : N(N) {}
+ virtual void anchor() {}
+ virtual ~MaskExpander() {}
+ virtual void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) {
+ unsigned Addend = 0;
+ if (R->getName() == "mask0")
+ Addend = 0;
+ else if (R->getName() == "mask1")
+ Addend = N;
+ else
+ return;
+ for (unsigned I = 0; I < N; ++I)
+ Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
+ }
+ };
- s += MangleName(name, typestr, ck);
- s += ", \"";
+ // (shuffle arg1, arg2, sequence)
+ std::pair<Type, std::string> Arg1 =
+ emitDagArg(DI->getArg(0), DI->getArgName(0));
+ std::pair<Type, std::string> Arg2 =
+ emitDagArg(DI->getArg(1), DI->getArgName(1));
+ assert_with_loc(Arg1.first == Arg2.first,
+ "Different types in arguments to shuffle!");
+
+ SetTheory ST;
+ LowHalf LH;
+ HighHalf HH;
+ MaskExpander ME(Arg1.first.getNumElements());
+ Rev R(Arg1.first.getElementSizeInBits());
+ SetTheory::RecSet Elts;
+ ST.addOperator("lowhalf", &LH);
+ ST.addOperator("highhalf", &HH);
+ ST.addOperator("rev", &R);
+ ST.addExpander("MaskExpand", &ME);
+ ST.evaluate(DI->getArg(2), Elts, ArrayRef<SMLoc>());
+
+ std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
+ for (auto &E : Elts) {
+ StringRef Name = E->getName();
+ assert_with_loc(Name.startswith("sv"),
+ "Incorrect element kind in shuffle mask!");
+ S += ", " + Name.drop_front(2).str();
+ }
+ S += ")";
+
+ // Recalculate the return type - the shuffle may have halved or doubled it.
+ Type T(Arg1.first);
+ if (Elts.size() > T.getNumElements()) {
+ assert_with_loc(
+ Elts.size() == T.getNumElements() * 2,
+ "Can only double or half the number of elements in a shuffle!");
+ T.doubleLanes();
+ } else if (Elts.size() < T.getNumElements()) {
+ assert_with_loc(
+ Elts.size() == T.getNumElements() / 2,
+ "Can only double or half the number of elements in a shuffle!");
+ T.halveLanes();
+ }
+
+ return std::make_pair(T, S);
+}
- for (unsigned i = 0, e = proto.size(); i != e; ++i)
- s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
+std::pair<Type, std::string> Intrinsic::emitDagDup(DagInit *DI) {
+ assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
+ std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
+ assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
- // Extra constant integer to hold type class enum for this function, e.g. s8
- if (ck == ClassB)
- s += "i";
+ Type T = getBaseType();
+ assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
+ std::string S = "(" + T.str() + ") {";
+ for (unsigned I = 0; I < T.getNumElements(); ++I) {
+ if (I != 0)
+ S += ", ";
+ S += A.second;
+ }
+ S += "}";
- s += "\", \"n\")";
- return s;
+ return std::make_pair(T, S);
}
-static std::string GenIntrinsic(const std::string &name,
- const std::string &proto,
- StringRef outTypeStr, StringRef inTypeStr,
- OpKind kind, ClassKind classKind) {
- assert(!proto.empty() && "");
- bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable;
- std::string s;
-
- // static always inline + return type
- if (define)
- s += "#define ";
- else
- s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
-
- // Function name with type suffix
- std::string mangledName = MangleName(name, outTypeStr, ClassS);
- if (outTypeStr != inTypeStr) {
- // If the input type is different (e.g., for vreinterpret), append a suffix
- // for the input type. String off a "Q" (quad) prefix so that MangleName
- // does not insert another "q" in the name.
- unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
- StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
- mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
- }
- s += mangledName;
-
- // Function arguments
- s += GenArgs(proto, inTypeStr, name);
-
- // Definition.
- if (define) {
- s += " __extension__ ({ \\\n ";
- s += GenMacroLocals(proto, inTypeStr, name);
- } else if (kind == OpUnavailable) {
- s += " __attribute__((unavailable));\n";
- return s;
- } else
- s += " {\n ";
-
- if (kind != OpNone)
- s += GenOpString(name, kind, proto, outTypeStr);
- else
- s += GenBuiltin(name, proto, outTypeStr, classKind);
- if (define)
- s += " })";
- else
- s += " }";
- s += "\n";
- return s;
+std::pair<Type, std::string> Intrinsic::emitDagSplat(DagInit *DI) {
+ assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
+ std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
+ std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1));
+
+ assert_with_loc(B.first.isScalar(),
+ "splat() requires a scalar int as the second argument");
+
+ std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
+ for (unsigned I = 0; I < BaseType.getNumElements(); ++I) {
+ S += ", " + B.second;
+ }
+ S += ")";
+
+ return std::make_pair(BaseType, S);
}
-/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
-/// is comprised of type definitions and function declarations.
-void NeonEmitter::run(raw_ostream &OS) {
- OS <<
- "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
- "---===\n"
- " *\n"
- " * Permission is hereby granted, free of charge, to any person obtaining "
- "a copy\n"
- " * of this software and associated documentation files (the \"Software\"),"
- " to deal\n"
- " * in the Software without restriction, including without limitation the "
- "rights\n"
- " * to use, copy, modify, merge, publish, distribute, sublicense, "
- "and/or sell\n"
- " * copies of the Software, and to permit persons to whom the Software is\n"
- " * furnished to do so, subject to the following conditions:\n"
- " *\n"
- " * The above copyright notice and this permission notice shall be "
- "included in\n"
- " * all copies or substantial portions of the Software.\n"
- " *\n"
- " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
- "EXPRESS OR\n"
- " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
- "MERCHANTABILITY,\n"
- " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
- "SHALL THE\n"
- " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
- "OTHER\n"
- " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
- "ARISING FROM,\n"
- " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
- "DEALINGS IN\n"
- " * THE SOFTWARE.\n"
- " *\n"
- " *===--------------------------------------------------------------------"
- "---===\n"
- " */\n\n";
+std::pair<Type, std::string> Intrinsic::emitDagSaveTemp(DagInit *DI) {
+ assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
+ std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1));
- OS << "#ifndef __ARM_NEON_H\n";
- OS << "#define __ARM_NEON_H\n\n";
+ assert_with_loc(!A.first.isVoid(),
+ "Argument to save_temp() must have non-void type!");
- OS << "#if !defined(__ARM_NEON)\n";
- OS << "#error \"NEON support not enabled\"\n";
- OS << "#endif\n\n";
+ std::string N = DI->getArgName(0);
+ assert_with_loc(N.size(), "save_temp() expects a name as the first argument");
- OS << "#include <stdint.h>\n\n";
+ assert_with_loc(Variables.find(N) == Variables.end(),
+ "Variable already defined!");
+ Variables[N] = Variable(A.first, N + VariablePostfix);
- // Emit NEON-specific scalar typedefs.
- OS << "typedef float float32_t;\n";
- OS << "typedef __fp16 float16_t;\n";
+ std::string S =
+ A.first.str() + " " + Variables[N].getName() + " = " + A.second;
- OS << "#ifdef __aarch64__\n";
- OS << "typedef double float64_t;\n";
- OS << "#endif\n\n";
+ return std::make_pair(Type::getVoid(), S);
+}
- // For now, signedness of polynomial types depends on target
- OS << "#ifdef __aarch64__\n";
- OS << "typedef uint8_t poly8_t;\n";
- OS << "typedef uint16_t poly16_t;\n";
- OS << "typedef uint64_t poly64_t;\n";
- OS << "typedef __uint128_t poly128_t;\n";
- OS << "#else\n";
- OS << "typedef int8_t poly8_t;\n";
- OS << "typedef int16_t poly16_t;\n";
- OS << "#endif\n";
+std::pair<Type, std::string> Intrinsic::emitDagNameReplace(DagInit *DI) {
+ std::string S = Name;
- // Emit Neon vector typedefs.
- std::string TypedefTypes(
- "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
- SmallVector<StringRef, 24> TDTypeVec;
- ParseTypes(nullptr, TypedefTypes, TDTypeVec);
+ assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
+ std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
+ std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
- // Emit vector typedefs.
- bool isA64 = false;
- bool preinsert;
- bool postinsert;
- for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
- bool dummy, quad = false, poly = false;
- char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
- preinsert = false;
- postinsert = false;
-
- if (type == 'd' || (type == 'l' && poly)) {
- preinsert = isA64? false: true;
- isA64 = true;
- } else {
- postinsert = isA64? true: false;
- isA64 = false;
- }
- if (postinsert)
- OS << "#endif\n";
- if (preinsert)
- OS << "#ifdef __aarch64__\n";
+ size_t Idx = S.find(ToReplace);
- if (poly)
- OS << "typedef __attribute__((neon_polyvector_type(";
- else
- OS << "typedef __attribute__((neon_vector_type(";
+ assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
+ S.replace(Idx, ToReplace.size(), ReplaceWith);
- unsigned nElts = GetNumElements(TDTypeVec[i], quad);
- OS << utostr(nElts) << "))) ";
- if (nElts < 10)
- OS << " ";
+ return std::make_pair(Type::getVoid(), S);
+}
- OS << TypeString('s', TDTypeVec[i]);
- OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
+std::pair<Type, std::string> Intrinsic::emitDagLiteral(DagInit *DI) {
+ std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
+ std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
+ return std::make_pair(Type::fromTypedefName(Ty), Value);
+}
+std::pair<Type, std::string> Intrinsic::emitDagArg(Init *Arg,
+ std::string ArgName) {
+ if (ArgName.size()) {
+ assert_with_loc(!Arg->isComplete(),
+ "Arguments must either be DAGs or names, not both!");
+ assert_with_loc(Variables.find(ArgName) != Variables.end(),
+ "Variable not defined!");
+ Variable &V = Variables[ArgName];
+ return std::make_pair(V.getType(), V.getName());
}
- postinsert = isA64? true: false;
- if (postinsert)
- OS << "#endif\n";
- OS << "\n";
- // Emit struct typedefs.
- isA64 = false;
- for (unsigned vi = 2; vi != 5; ++vi) {
- for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
- bool dummy, quad = false, poly = false;
- char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
- preinsert = false;
- postinsert = false;
-
- if (type == 'd' || (type == 'l' && poly)) {
- preinsert = isA64? false: true;
- isA64 = true;
- } else {
- postinsert = isA64? true: false;
- isA64 = false;
- }
- if (postinsert)
- OS << "#endif\n";
- if (preinsert)
- OS << "#ifdef __aarch64__\n";
+ assert(Arg && "Neither ArgName nor Arg?!");
+ DagInit *DI = dyn_cast<DagInit>(Arg);
+ assert_with_loc(DI, "Arguments must either be DAGs or names!");
- std::string ts = TypeString('d', TDTypeVec[i]);
- std::string vs = TypeString('0' + vi, TDTypeVec[i]);
- OS << "typedef struct " << vs << " {\n";
- OS << " " << ts << " val";
- OS << "[" << utostr(vi) << "]";
- OS << ";\n} ";
- OS << vs << ";\n";
- OS << "\n";
- }
- }
- postinsert = isA64? true: false;
- if (postinsert)
- OS << "#endif\n";
- OS << "\n";
+ return emitDag(DI);
+}
- OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
+std::string Intrinsic::generate() {
+ CurrentRecord = R;
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ // If we call a macro, our local variables may be corrupted due to
+ // lack of proper lexical scoping. So, add a globally unique postfix
+ // to every variable.
+ //
+ // indexBody() should have set up the Dependencies set by now.
+ for (auto *I : Dependencies)
+ if (I->UseMacro) {
+ VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
+ break;
+ }
- StringMap<ClassKind> EmittedMap;
- std::string CurrentGuard = "";
- bool InGuard = false;
+ initVariables();
- // Some intrinsics are used to express others. These need to be emitted near
- // the beginning so that the declarations are present when needed. This is
- // rather an ugly, arbitrary list, but probably simpler than actually tracking
- // dependency info.
- static const char *EarlyDefsArr[] =
- { "VFMA", "VQMOVN", "VQMOVUN", "VABD", "VMOVL",
- "VABDL", "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH",
- "VMULL", "VMLAL_N", "VMLSL_N", "VMULL_N", "VMULL_P64",
- "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" };
- ArrayRef<const char *> EarlyDefs(EarlyDefsArr);
+ emitPrototype();
- for (unsigned i = 0; i < EarlyDefs.size(); ++i) {
- Record *R = Records.getDef(EarlyDefs[i]);
- emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
+ if (IsUnavailable) {
+ OS << " __attribute__((unavailable));";
+ } else {
+ emitOpeningBrace();
+ emitShadowedArgs();
+ emitBody();
+ emitReturn();
+ emitClosingBrace();
}
+ OS << "\n";
- for (unsigned i = 0, e = RV.size(); i != e; ++i) {
- Record *R = RV[i];
- if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) !=
- EarlyDefs.end())
- continue;
+ CurrentRecord = nullptr;
+ return OS.str();
+}
- emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
- }
+void Intrinsic::indexBody() {
+ CurrentRecord = R;
- if (InGuard)
- OS << "#endif\n\n";
+ initVariables();
+ emitBody();
+ OS.str("");
- OS << "#undef __ai\n\n";
- OS << "#endif /* __ARM_NEON_H */\n";
+ CurrentRecord = nullptr;
}
-void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R,
- std::string &CurrentGuard, bool &InGuard,
- StringMap<ClassKind> &EmittedMap) {
+//===----------------------------------------------------------------------===//
+// NeonEmitter implementation
+//===----------------------------------------------------------------------===//
+
+Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
+ // First, look up the name in the intrinsic map.
+ assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
+ ("Intrinsic '" + Name + "' not found!").str());
+ std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
+ std::vector<Intrinsic *> GoodVec;
+
+ // Create a string to print if we end up failing.
+ std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
+ for (unsigned I = 0; I < Types.size(); ++I) {
+ if (I != 0)
+ ErrMsg += ", ";
+ ErrMsg += Types[I].str();
+ }
+ ErrMsg += ")'\n";
+ ErrMsg += "Available overloads:\n";
+
+ // Now, look through each intrinsic implementation and see if the types are
+ // compatible.
+ for (auto *I : V) {
+ ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName();
+ ErrMsg += "(";
+ for (unsigned A = 0; A < I->getNumParams(); ++A) {
+ if (A != 0)
+ ErrMsg += ", ";
+ ErrMsg += I->getParamType(A).str();
+ }
+ ErrMsg += ")\n";
- std::string NewGuard = R->getValueAsString("ArchGuard");
- if (NewGuard != CurrentGuard) {
- if (InGuard)
- OS << "#endif\n\n";
- if (NewGuard.size())
- OS << "#if " << NewGuard << '\n';
+ if (I->getNumParams() != Types.size())
+ continue;
- CurrentGuard = NewGuard;
- InGuard = NewGuard.size() != 0;
+ bool Good = true;
+ for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
+ if (I->getParamType(Arg) != Types[Arg]) {
+ Good = false;
+ break;
+ }
+ }
+ if (Good)
+ GoodVec.push_back(I);
}
- emitIntrinsic(OS, R, EmittedMap);
+ assert_with_loc(GoodVec.size() > 0,
+ "No compatible intrinsic found - " + ErrMsg);
+ assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
+
+ return GoodVec.front();
}
-/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
-/// intrinsics specified by record R checking for intrinsic uniqueness.
-void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
- StringMap<ClassKind> &EmittedMap) {
- std::string name = R->getValueAsString("Name");
+void NeonEmitter::createIntrinsic(Record *R,
+ SmallVectorImpl<Intrinsic *> &Out) {
+ std::string Name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
std::string Types = R->getValueAsString("Types");
+ Record *OperationRec = R->getValueAsDef("Operation");
+ bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
+ std::string Guard = R->getValueAsString("ArchGuard");
+ bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
+
+ // Set the global current record. This allows assert_with_loc to produce
+ // decent location information even when highly nested.
+ CurrentRecord = R;
- SmallVector<StringRef, 16> TypeVec;
- ParseTypes(R, Types, TypeVec);
+ ListInit *Body = OperationRec->getValueAsListInit("Ops");
- OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
+ std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
- ClassKind classKind = ClassNone;
+ ClassKind CK = ClassNone;
if (R->getSuperClasses().size() >= 2)
- classKind = ClassMap[R->getSuperClasses()[1]];
- if (classKind == ClassNone && kind == OpNone)
- PrintFatalError(R->getLoc(), "Builtin has no class kind");
-
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- if (kind == OpReinterpret) {
- bool outQuad = false;
- bool dummy = false;
- (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
- for (unsigned srcti = 0, srcte = TypeVec.size();
- srcti != srcte; ++srcti) {
- bool inQuad = false;
- (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
- if (srcti == ti || inQuad != outQuad)
+ CK = ClassMap[R->getSuperClasses()[1]];
+
+ std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
+ for (auto TS : TypeSpecs) {
+ if (CartesianProductOfTypes) {
+ Type DefaultT(TS, 'd');
+ for (auto SrcTS : TypeSpecs) {
+ Type DefaultSrcT(SrcTS, 'd');
+ if (TS == SrcTS ||
+ DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
continue;
- std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
- OpCast, ClassS);
- if (EmittedMap.count(s))
- continue;
- EmittedMap[s] = ClassS;
- OS << s;
+ NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
}
} else {
- std::string s =
- GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
- if (EmittedMap.count(s)) {
- errs() << "warning: duplicate definition: " << name
- << " (type: " << TypeString('d', TypeVec[ti]) << ")\n";
- continue;
- }
- EmittedMap[s] = classKind;
- OS << s;
+ NewTypeSpecs.push_back(std::make_pair(TS, TS));
}
}
- OS << "\n";
-}
-
-static unsigned RangeFromType(const char mod, StringRef typestr) {
- // base type to get the type string for.
- bool quad = false, dummy = false;
- char type = ClassifyType(typestr, quad, dummy, dummy);
- type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
- switch (type) {
- case 'c':
- return (8 << (int)quad) - 1;
- case 'h':
- case 's':
- return (4 << (int)quad) - 1;
- case 'f':
- case 'i':
- return (2 << (int)quad) - 1;
- case 'd':
- case 'l':
- return (1 << (int)quad) - 1;
- case 'k':
- return 0;
- default:
- PrintFatalError("unhandled type!");
- }
-}
+ std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
+ std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end());
-static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
- // base type to get the type string for.
- bool dummy = false;
- char type = ClassifyType(typestr, dummy, dummy, dummy);
- type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
+ for (auto &I : NewTypeSpecs) {
+ Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
+ *this, Guard, IsUnavailable);
- switch (type) {
- case 'c':
- return 7;
- case 'h':
- case 's':
- return 15;
- case 'f':
- case 'i':
- return 31;
- case 'd':
- case 'l':
- return 63;
- case 'k':
- return 127;
- default:
- PrintFatalError("unhandled type!");
+ IntrinsicMap[Name].push_back(IT);
+ Out.push_back(IT);
}
-}
-/// Generate the ARM and AArch64 intrinsic range checking code for
-/// shift/lane immediates, checking for unique declarations.
-void
-NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS) {
- std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
- StringMap<OpKind> EmittedMap;
+ CurrentRecord = nullptr;
+}
- // Generate the intrinsic range checking code for shift/lane immediates.
- OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
+/// declaration of builtins, checking for unique builtin declarations.
+void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
+ SmallVectorImpl<Intrinsic *> &Defs) {
+ OS << "#ifdef GET_NEON_BUILTINS\n";
- for (unsigned i = 0, e = RV.size(); i != e; ++i) {
- Record *R = RV[i];
+ // We only want to emit a builtin once, and we want to emit them in
+ // alphabetical order, so use a std::set.
+ std::set<std::string> Builtins;
- OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
- if (k != OpNone)
+ for (auto *Def : Defs) {
+ if (Def->hasBody())
continue;
-
- std::string name = R->getValueAsString("Name");
- std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
- std::string Rename = name + "@" + Proto;
-
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
- if (Proto.find('a') != std::string::npos)
+ if (Def->hasSplat())
continue;
- // Functions which do not have an immediate do not need to have range
- // checking code emitted.
- size_t immPos = Proto.find('i');
- if (immPos == std::string::npos)
- continue;
+ std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
- SmallVector<StringRef, 16> TypeVec;
- ParseTypes(R, Types, TypeVec);
-
- if (R->getSuperClasses().size() < 2)
- PrintFatalError(R->getLoc(), "Builtin has no class kind");
-
- ClassKind ck = ClassMap[R->getSuperClasses()[1]];
- if (!ProtoHasScalar(Proto))
- ck = ClassB;
-
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- std::string namestr, shiftstr, rangestr;
-
- if (R->getValueAsBit("isVCVT_N")) {
- // VCVT between floating- and fixed-point values takes an immediate
- // in the range [1, 32] for f32, or [1, 64] for f64.
- ck = ClassB;
- if (name.find("32") != std::string::npos)
- rangestr = "l = 1; u = 31"; // upper bound = l + u
- else if (name.find("64") != std::string::npos)
- rangestr = "l = 1; u = 63";
- else
- PrintFatalError(R->getLoc(),
- "Fixed point convert name should contains \"32\" or \"64\"");
-
- } else if (R->getValueAsBit("isScalarShift")) {
- // Right shifts have an 'r' in the name, left shifts do not. Convert
- // instructions have the same bounds and right shifts.
- if (name.find('r') != std::string::npos ||
- name.find("cvt") != std::string::npos)
- rangestr = "l = 1; ";
-
- unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
- // Narrow shift has half the upper bound
- if (R->getValueAsBit("isScalarNarrowShift"))
- upBound /= 2;
-
- rangestr += "u = " + utostr(upBound);
- } else if (R->getValueAsBit("isShift")) {
- // Builtins which are overloaded by type will need to have their upper
- // bound computed at Sema time based on the type constant.
- shiftstr = ", true";
-
- // Right shifts have an 'r' in the name, left shifts do not.
- if (name.find('r') != std::string::npos)
- rangestr = "l = 1; ";
-
- rangestr += "u = RFT(TV" + shiftstr + ")";
- } else if (ck == ClassB) {
- // ClassB intrinsics have a type (and hence lane number) that is only
- // known at runtime.
- assert(immPos > 0 && "unexpected immediate operand");
- if (R->getValueAsBit("isLaneQ"))
- rangestr = "u = RFT(TV, false, true)";
- else
- rangestr = "u = RFT(TV, false, false)";
- } else {
- // The immediate generally refers to a lane in the preceding argument.
- assert(immPos > 0 && "unexpected immediate operand");
- rangestr =
- "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
- }
- // Make sure cases appear only once by uniquing them in a string map.
- namestr = MangleName(name, TypeVec[ti], ck);
- if (EmittedMap.count(namestr))
- continue;
- EmittedMap[namestr] = OpNone;
-
- // Calculate the index of the immediate that should be range checked.
- unsigned immidx = 0;
-
- // Builtins that return a struct of multiple vectors have an extra
- // leading arg for the struct return.
- if (IsMultiVecProto(Proto[0]))
- ++immidx;
-
- // Add one to the index for each argument until we reach the immediate
- // to be checked. Structs of vectors are passed as multiple arguments.
- for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
- switch (Proto[ii]) {
- default:
- immidx += 1;
- break;
- case '2':
- case 'B':
- immidx += 2;
- break;
- case '3':
- case 'C':
- immidx += 3;
- break;
- case '4':
- case 'D':
- immidx += 4;
- break;
- case 'i':
- ie = ii + 1;
- break;
- }
- }
- OS << "case NEON::BI__builtin_neon_";
- OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
- << rangestr << "; break;\n";
- }
+ S += Def->getBuiltinTypeStr();
+ S += "\", \"n\")";
+
+ Builtins.insert(S);
}
+
+ for (auto &S : Builtins)
+ OS << S << "\n";
OS << "#endif\n\n";
}
-struct OverloadInfo {
- uint64_t Mask;
- int PtrArgNum;
- bool HasConstPtr;
-};
/// Generate the ARM and AArch64 overloaded type checking code for
/// SemaChecking.cpp, checking for unique builtin declarations.
-void
-NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) {
- std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
-
- // Generate the overloaded type checking code for SemaChecking.cpp
+void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
+ SmallVectorImpl<Intrinsic *> &Defs) {
OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
// We record each overload check line before emitting because subsequent Inst
// definitions may extend the number of permitted types (i.e. augment the
// Mask). Use std::map to avoid sorting the table by hash number.
+ struct OverloadInfo {
+ uint64_t Mask;
+ int PtrArgNum;
+ bool HasConstPtr;
+ OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
+ };
std::map<std::string, OverloadInfo> OverloadMap;
- typedef std::map<std::string, OverloadInfo>::iterator OverloadIterator;
- for (unsigned i = 0, e = RV.size(); i != e; ++i) {
- Record *R = RV[i];
- OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
- if (k != OpNone)
+ for (auto *Def : Defs) {
+ // If the def has a body (that is, it has Operation DAGs), it won't call
+ // __builtin_neon_* so we don't need to generate a definition for it.
+ if (Def->hasBody())
continue;
-
- std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
- std::string name = R->getValueAsString("Name");
- std::string Rename = name + "@" + Proto;
-
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
- if (Proto.find('a') != std::string::npos)
+ if (Def->hasSplat())
continue;
-
// Functions which have a scalar argument cannot be overloaded, no need to
// check them if we are emitting the type checking code.
- if (ProtoHasScalar(Proto))
+ if (Def->protoHasScalar())
continue;
- SmallVector<StringRef, 16> TypeVec;
- ParseTypes(R, Types, TypeVec);
-
- if (R->getSuperClasses().size() < 2)
- PrintFatalError(R->getLoc(), "Builtin has no class kind");
+ uint64_t Mask = 0ULL;
+ Type Ty = Def->getReturnType();
+ if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
+ Def->getProto()[0] == 'F')
+ Ty = Def->getParamType(0);
+ if (Ty.isPointer())
+ Ty = Def->getParamType(1);
- int si = -1, qi = -1;
- uint64_t mask = 0, qmask = 0;
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- // Generate the switch case(s) for this builtin for the type validation.
- bool quad = false, poly = false, usgn = false;
- (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
-
- if (quad) {
- qi = ti;
- qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
- } else {
- si = ti;
- mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
- }
- }
+ Mask |= 1ULL << Ty.getNeonEnum();
- // Check if the builtin function has a pointer or const pointer argument.
+ // Check if the function has a pointer or const pointer argument.
+ std::string Proto = Def->getProto();
int PtrArgNum = -1;
bool HasConstPtr = false;
- for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
- char ArgType = Proto[arg];
+ for (unsigned I = 0; I < Def->getNumParams(); ++I) {
+ char ArgType = Proto[I + 1];
if (ArgType == 'c') {
HasConstPtr = true;
- PtrArgNum = arg - 1;
+ PtrArgNum = I;
break;
}
if (ArgType == 'p') {
- PtrArgNum = arg - 1;
+ PtrArgNum = I;
break;
}
}
// For sret builtins, adjust the pointer argument index.
- if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
+ if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
PtrArgNum += 1;
+ std::string Name = Def->getName();
// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
// and vst1_lane intrinsics. Using a pointer to the vector element
// type with one of those operations causes codegen to select an aligned
// load/store instruction. If you want an unaligned operation,
// the pointer argument needs to have less alignment than element type,
// so just accept any pointer type.
- if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
+ if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
PtrArgNum = -1;
HasConstPtr = false;
}
- if (mask) {
- std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
- MangleName(name, TypeVec[si], ClassB), OverloadInfo()));
- OverloadInfo &Record = I.first->second;
- if (!I.second)
- assert(Record.PtrArgNum == PtrArgNum &&
- Record.HasConstPtr == HasConstPtr);
- Record.Mask |= mask;
- Record.PtrArgNum = PtrArgNum;
- Record.HasConstPtr = HasConstPtr;
- }
- if (qmask) {
- std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
- MangleName(name, TypeVec[qi], ClassB), OverloadInfo()));
- OverloadInfo &Record = I.first->second;
- if (!I.second)
- assert(Record.PtrArgNum == PtrArgNum &&
- Record.HasConstPtr == HasConstPtr);
- Record.Mask |= qmask;
- Record.PtrArgNum = PtrArgNum;
- Record.HasConstPtr = HasConstPtr;
+ if (Mask) {
+ std::string Name = Def->getMangledName();
+ OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
+ OverloadInfo &OI = OverloadMap[Name];
+ OI.Mask |= Mask;
+ OI.PtrArgNum |= PtrArgNum;
+ OI.HasConstPtr = HasConstPtr;
}
}
- for (OverloadIterator I = OverloadMap.begin(), E = OverloadMap.end(); I != E;
- ++I) {
- OverloadInfo &BuiltinOverloads = I->second;
- OS << "case NEON::BI__builtin_neon_" << I->first << ": ";
- OS << "mask = " << "0x" << utohexstr(BuiltinOverloads.Mask) << "ULL";
- if (BuiltinOverloads.PtrArgNum >= 0)
- OS << "; PtrArgNum = " << BuiltinOverloads.PtrArgNum;
- if (BuiltinOverloads.HasConstPtr)
+ for (auto &I : OverloadMap) {
+ OverloadInfo &OI = I.second;
+
+ OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
+ OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
+ if (OI.PtrArgNum >= 0)
+ OS << "; PtrArgNum = " << OI.PtrArgNum;
+ if (OI.HasConstPtr)
OS << "; HasConstPtr = true";
OS << "; break;\n";
}
-
OS << "#endif\n\n";
}
-/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
-/// declaration of builtins, checking for unique builtin declarations.
-void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
- std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+void
+NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
+ SmallVectorImpl<Intrinsic *> &Defs) {
+ OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
- // We want to emit the intrinsics in alphabetical order, so use the more
- // expensive std::map to gather them together first.
- std::map<std::string, OpKind> EmittedMap;
+ std::set<std::string> Emitted;
- for (unsigned i = 0, e = RV.size(); i != e; ++i) {
- Record *R = RV[i];
- OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
- if (k != OpNone)
+ for (auto *Def : Defs) {
+ if (Def->hasBody())
continue;
-
- std::string Proto = R->getValueAsString("Prototype");
- std::string name = R->getValueAsString("Name");
- std::string Rename = name + "@" + Proto;
-
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
- if (Proto.find('a') != std::string::npos)
+ if (Def->hasSplat())
+ continue;
+ // Functions which do not have an immediate do not ned to have range
+ // checking
+ // code emitted.
+ if (!Def->hasImmediate())
+ continue;
+ if (Emitted.find(Def->getMangledName()) != Emitted.end())
continue;
- std::string Types = R->getValueAsString("Types");
- SmallVector<StringRef, 16> TypeVec;
- ParseTypes(R, Types, TypeVec);
+ std::string LowerBound, UpperBound;
- if (R->getSuperClasses().size() < 2)
- PrintFatalError(R->getLoc(), "Builtin has no class kind");
+ Record *R = Def->getRecord();
+ if (R->getValueAsBit("isVCVT_N")) {
+ // VCVT between floating- and fixed-point values takes an immediate
+ // in the range [1, 32) for f32 or [1, 64) for f64.
+ LowerBound = "1";
+ if (Def->getBaseType().getElementSizeInBits() == 32)
+ UpperBound = "31";
+ else
+ UpperBound = "63";
+ } else if (R->getValueAsBit("isScalarShift")) {
+ // Right shifts have an 'r' in the name, left shifts do not. Convert
+ // instructions have the same bounds and right shifts.
+ if (Def->getName().find('r') != std::string::npos ||
+ Def->getName().find("cvt") != std::string::npos)
+ LowerBound = "1";
+
+ UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
+ } else if (R->getValueAsBit("isShift")) {
+ // Builtins which are overloaded by type will need to have thier upper
+ // bound computed at Sema time based on the type constant.
+
+ // Right shifts have an 'r' in the name, left shifts do not.
+ if (Def->getName().find('r') != std::string::npos)
+ LowerBound = "1";
+ UpperBound = "RFT(TV, true)";
+ } else if (Def->getClassKind(true) == ClassB) {
+ // ClassB intrinsics have a type (and hence lane number) that is only
+ // known at runtime.
+ if (R->getValueAsBit("isLaneQ"))
+ UpperBound = "RFT(TV, false, true)";
+ else
+ UpperBound = "RFT(TV, false, false)";
+ } else {
+ // The immediate generally refers to a lane in the preceding argument.
+ assert(Def->getImmediateIdx() > 0);
+ Type T = Def->getParamType(Def->getImmediateIdx() - 1);
+ UpperBound = utostr(T.getNumElements() - 1);
+ }
- ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ // Calculate the index of the immediate that should be range checked.
+ unsigned Idx = Def->getNumParams();
+ if (Def->hasImmediate())
+ Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- // Generate the declaration for this builtin, ensuring
- // that each unique BUILTIN() macro appears only once in the output
- // stream.
- std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
- if (EmittedMap.count(bd))
- continue;
+ OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
+ << "i = " << Idx << ";";
+ if (LowerBound.size())
+ OS << " l = " << LowerBound << ";";
+ if (UpperBound.size())
+ OS << " u = " << UpperBound << ";";
+ OS << " break;\n";
- EmittedMap[bd] = OpNone;
- }
+ Emitted.insert(Def->getMangledName());
}
- // Generate BuiltinsNEON.
- OS << "#ifdef GET_NEON_BUILTINS\n";
-
- for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(),
- E = EmittedMap.end();
- I != E; ++I)
- OS << I->first << "\n";
-
OS << "#endif\n\n";
}
void NeonEmitter::runHeader(raw_ostream &OS) {
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<Intrinsic *, 128> Defs;
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
// Generate shared BuiltinsXXX.def
- genBuiltinsDef(OS);
+ genBuiltinsDef(OS, Defs);
// Generate ARM overloaded type checking code for SemaChecking.cpp
- genOverloadTypeCheckCode(OS);
+ genOverloadTypeCheckCode(OS, Defs);
// Generate ARM range checking code for shift/lane immediates.
- genIntrinsicRangeCheckCode(OS);
+ genIntrinsicRangeCheckCode(OS, Defs);
}
-/// GenTest - Write out a test for the intrinsic specified by the name and
-/// type strings, including the embedded patterns for FileCheck to match.
-static std::string GenTest(const std::string &name,
- const std::string &proto,
- StringRef outTypeStr, StringRef inTypeStr,
- bool isShift, bool isHiddenLOp,
- ClassKind ck, const std::string &InstName,
- bool isA64,
- std::string & testFuncProto) {
- assert(!proto.empty() && "");
- std::string s;
-
- // Function name with type suffix
- std::string mangledName = MangleName(name, outTypeStr, ClassS);
- if (outTypeStr != inTypeStr) {
- // If the input type is different (e.g., for vreinterpret), append a suffix
- // for the input type. String off a "Q" (quad) prefix so that MangleName
- // does not insert another "q" in the name.
- unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
- StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
- mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
- }
-
- // todo: GenerateChecksForIntrinsic does not generate CHECK
- // for aarch64 instructions yet
- std::vector<std::string> FileCheckPatterns;
- if (!isA64) {
- GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
- isHiddenLOp, FileCheckPatterns);
- s+= "// CHECK_ARM: test_" + mangledName + "\n";
- }
- s += "// CHECK_AARCH64: test_" + mangledName + "\n";
-
- // Emit the FileCheck patterns.
- // If for any reason we do not want to emit a check, mangledInst
- // will be the empty string.
- if (FileCheckPatterns.size()) {
- for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
- e = FileCheckPatterns.end();
- i != e;
- ++i) {
- s += "// CHECK_ARM: " + *i + "\n";
+/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
+/// is comprised of type definitions and function declarations.
+void NeonEmitter::run(raw_ostream &OS) {
+ OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
+ "------------------------------"
+ "---===\n"
+ " *\n"
+ " * Permission is hereby granted, free of charge, to any person "
+ "obtaining "
+ "a copy\n"
+ " * of this software and associated documentation files (the "
+ "\"Software\"),"
+ " to deal\n"
+ " * in the Software without restriction, including without limitation "
+ "the "
+ "rights\n"
+ " * to use, copy, modify, merge, publish, distribute, sublicense, "
+ "and/or sell\n"
+ " * copies of the Software, and to permit persons to whom the Software "
+ "is\n"
+ " * furnished to do so, subject to the following conditions:\n"
+ " *\n"
+ " * The above copyright notice and this permission notice shall be "
+ "included in\n"
+ " * all copies or substantial portions of the Software.\n"
+ " *\n"
+ " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
+ "EXPRESS OR\n"
+ " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
+ "MERCHANTABILITY,\n"
+ " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
+ "SHALL THE\n"
+ " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
+ "OTHER\n"
+ " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
+ "ARISING FROM,\n"
+ " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
+ "DEALINGS IN\n"
+ " * THE SOFTWARE.\n"
+ " *\n"
+ " *===-----------------------------------------------------------------"
+ "---"
+ "---===\n"
+ " */\n\n";
+
+ OS << "#ifndef __ARM_NEON_H\n";
+ OS << "#define __ARM_NEON_H\n\n";
+
+ OS << "#if !defined(__ARM_NEON)\n";
+ OS << "#error \"NEON support not enabled\"\n";
+ OS << "#endif\n\n";
+
+ OS << "#include <stdint.h>\n\n";
+
+ // Emit NEON-specific scalar typedefs.
+ OS << "typedef float float32_t;\n";
+ OS << "typedef __fp16 float16_t;\n";
+
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef double float64_t;\n";
+ OS << "#endif\n\n";
+
+ // For now, signedness of polynomial types depends on target
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef uint8_t poly8_t;\n";
+ OS << "typedef uint16_t poly16_t;\n";
+ OS << "typedef uint64_t poly64_t;\n";
+ OS << "typedef __uint128_t poly128_t;\n";
+ OS << "#else\n";
+ OS << "typedef int8_t poly8_t;\n";
+ OS << "typedef int16_t poly16_t;\n";
+ OS << "#endif\n";
+
+ // Emit Neon vector typedefs.
+ std::string TypedefTypes(
+ "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
+ std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
+
+ // Emit vector typedefs.
+ bool InIfdef = false;
+ for (auto &TS : TDTypeVec) {
+ bool IsA64 = false;
+ Type T(TS, 'd');
+ if (T.isDouble() || (T.isPoly() && T.isLong()))
+ IsA64 = true;
+
+ if (InIfdef && !IsA64) {
+ OS << "#endif\n";
+ InIfdef = false;
+ }
+ if (!InIfdef && IsA64) {
+ OS << "#ifdef __aarch64__\n";
+ InIfdef = true;
}
+
+ if (T.isPoly())
+ OS << "typedef __attribute__((neon_polyvector_type(";
+ else
+ OS << "typedef __attribute__((neon_vector_type(";
+
+ Type T2 = T;
+ T2.makeScalar();
+ OS << utostr(T.getNumElements()) << "))) ";
+ OS << T2.str();
+ OS << " " << T.str() << ";\n";
}
+ if (InIfdef)
+ OS << "#endif\n";
+ OS << "\n";
- // Emit the start of the test function.
+ // Emit struct typedefs.
+ InIfdef = false;
+ for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
+ for (auto &TS : TDTypeVec) {
+ bool IsA64 = false;
+ Type T(TS, 'd');
+ if (T.isDouble() || (T.isPoly() && T.isLong()))
+ IsA64 = true;
+
+ if (InIfdef && !IsA64) {
+ OS << "#endif\n";
+ InIfdef = false;
+ }
+ if (!InIfdef && IsA64) {
+ OS << "#ifdef __aarch64__\n";
+ InIfdef = true;
+ }
- testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
- char arg = 'a';
- std::string comma;
- for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
- // Do not create arguments for values that must be immediate constants.
- if (proto[i] == 'i')
- continue;
- testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
- testFuncProto.push_back(arg);
- comma = ", ";
- }
- testFuncProto += ")";
-
- s+= testFuncProto;
- s+= " {\n ";
-
- if (proto[0] != 'v')
- s += "return ";
- s += mangledName + "(";
- arg = 'a';
- for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
- if (proto[i] == 'i') {
- // For immediate operands, test the maximum value.
- if (isShift)
- s += "1"; // FIXME
- else
- // The immediate generally refers to a lane in the preceding argument.
- s += utostr(RangeFromType(proto[i-1], inTypeStr));
- } else {
- s.push_back(arg);
+ char M = '2' + (NumMembers - 2);
+ Type VT(TS, M);
+ OS << "typedef struct " << VT.str() << " {\n";
+ OS << " " << T.str() << " val";
+ OS << "[" << utostr(NumMembers) << "]";
+ OS << ";\n} ";
+ OS << VT.str() << ";\n";
+ OS << "\n";
}
- if ((i + 1) < e)
- s += ", ";
}
- s += ");\n}\n\n";
- return s;
-}
+ if (InIfdef)
+ OS << "#endif\n";
+ OS << "\n";
-/// Write out all intrinsic tests for the specified target, checking
-/// for intrinsic test uniqueness.
-void NeonEmitter::genTargetTest(raw_ostream &OS) {
- StringMap<OpKind> EmittedMap;
- std::string CurrentGuard = "";
- bool InGuard = false;
+ OS << "#define __ai static inline __attribute__((__always_inline__, "
+ "__nodebug__))\n\n";
+ SmallVector<Intrinsic *, 128> Defs;
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
- for (unsigned i = 0, e = RV.size(); i != e; ++i) {
- Record *R = RV[i];
- std::string name = R->getValueAsString("Name");
- std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
- bool isShift = R->getValueAsBit("isShift");
- std::string InstName = R->getValueAsString("InstName");
- bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
-
- std::string NewGuard = R->getValueAsString("ArchGuard");
- if (NewGuard != CurrentGuard) {
- if (InGuard)
- OS << "#endif\n\n";
- if (NewGuard.size())
- OS << "#if " << NewGuard << '\n';
-
- CurrentGuard = NewGuard;
- InGuard = NewGuard.size() != 0;
- }
-
- SmallVector<StringRef, 16> TypeVec;
- ParseTypes(R, Types, TypeVec);
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ for (auto *I : Defs)
+ I->indexBody();
+
+ std::stable_sort(
+ Defs.begin(), Defs.end(),
+ [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });
+
+ // Only emit a def when its requirements have been met.
+ // FIXME: This loop could be made faster, but it's fast enough for now.
+ bool MadeProgress = true;
+ std::string InGuard = "";
+ while (!Defs.empty() && MadeProgress) {
+ MadeProgress = false;
+
+ for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
+ I != Defs.end(); /*No step*/) {
+ bool DependenciesSatisfied = true;
+ for (auto *II : (*I)->getDependencies()) {
+ if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
+ DependenciesSatisfied = false;
+ }
+ if (!DependenciesSatisfied) {
+ // Try the next one.
+ ++I;
+ continue;
+ }
- ClassKind ck = ClassMap[R->getSuperClasses()[1]];
- OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
- if (kind == OpUnavailable)
- continue;
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- if (kind == OpReinterpret) {
- bool outQuad = false;
- bool dummy = false;
- (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
- for (unsigned srcti = 0, srcte = TypeVec.size();
- srcti != srcte; ++srcti) {
- bool inQuad = false;
- (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
- if (srcti == ti || inQuad != outQuad)
- continue;
- std::string testFuncProto;
- std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
- isShift, isHiddenLOp, ck, InstName,
- CurrentGuard.size(), testFuncProto);
- if (EmittedMap.count(testFuncProto))
- continue;
- EmittedMap[testFuncProto] = kind;
- OS << s << "\n";
- }
- } else {
- std::string testFuncProto;
- std::string s =
- GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp,
- ck, InstName, CurrentGuard.size(), testFuncProto);
- OS << s << "\n";
+ // Emit #endif/#if pair if needed.
+ if ((*I)->getGuard() != InGuard) {
+ if (!InGuard.empty())
+ OS << "#endif\n";
+ InGuard = (*I)->getGuard();
+ if (!InGuard.empty())
+ OS << "#if " << InGuard << "\n";
}
+
+ // Actually generate the intrinsic code.
+ OS << (*I)->generate();
+
+ MadeProgress = true;
+ I = Defs.erase(I);
}
}
-
- if (InGuard)
+ assert(Defs.empty() && "Some requirements were not satisfied!");
+ if (!InGuard.empty())
OS << "#endif\n";
-}
-/// runTests - Write out a complete set of tests for all of the Neon
-/// intrinsics.
-void NeonEmitter::runTests(raw_ostream &OS) {
- OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
- "apcs-gnu\\\n"
- "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
- "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
- "\n"
- "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
- "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
- "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
- "\n"
- "// REQUIRES: long_tests\n"
- "\n"
- "#include <arm_neon.h>\n"
- "\n";
-
- genTargetTest(OS);
+
+ OS << "\n";
+ OS << "#undef __ai\n\n";
+ OS << "#endif /* __ARM_NEON_H */\n";
}
namespace clang {
NeonEmitter(Records).runHeader(OS);
}
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
- NeonEmitter(Records).runTests(OS);
+ assert(0 && "Neon test generation no longer implemented!");
}
} // End namespace clang