def OP_SCALAR_MULX_LNQ : Op;
def OP_SCALAR_VMULX_LN : Op;
def OP_SCALAR_VMULX_LNQ : Op;
+def OP_SCALAR_QDMULL_LN : Op;
+def OP_SCALAR_QDMULL_LNQ : Op;
+def OP_SCALAR_QDMULH_LN : Op;
+def OP_SCALAR_QDMULH_LNQ : Op;
+def OP_SCALAR_QRDMULH_LN : Op;
+def OP_SCALAR_QRDMULH_LNQ : Op;
class Inst <string n, string p, string t, Op o> {
string Name = n;
// Scalar Floating Point fused multiply-subtract (scalar, by element)
def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>;
def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>;
+
+// Signed Saturating Doubling Multiply Long (scalar by element)
+def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>;
+def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LNQ>;
+
+// Signed Saturating Doubling Multiply-Add Long (scalar by element)
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">;
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">;
+
+// Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">;
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">;
+
+// Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
+def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>;
+def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LNQ>;
+
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
+def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LNQ>;
+
}
// argument that specifies the vector type, need to handle each case.
switch (BuiltinID) {
default: break;
+ case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 :
+ case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 :
+ case AArch64::BI__builtin_neon_vqdmlals_lane_s32 :
+ case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 :
+ case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 :
+ case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 :
+ case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 :
+ case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : {
+ Int = Intrinsic::arm_neon_vqadds;
+ if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 ||
+ BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 ||
+ BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 ||
+ BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) {
+ Int = Intrinsic::arm_neon_vqsubs;
+ }
+ // create vqdmull call with b * c[i]
+ llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
+ llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
+ Ty = CGF.ConvertType(E->getArg(0)->getType());
+ llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
+ Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
+ Value *V = UndefValue::get(OpVTy);
+ llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
+ SmallVector<Value *, 2> MulOps;
+ MulOps.push_back(Ops[1]);
+ MulOps.push_back(Ops[2]);
+ MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
+ MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
+ MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
+ Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
+ // create vqadds call with a +/- vqdmull result
+ F = CGF.CGM.getIntrinsic(Int, ResVTy);
+ SmallVector<Value *, 2> AddOps;
+ AddOps.push_back(Ops[0]);
+ AddOps.push_back(MulRes);
+ V = UndefValue::get(ResVTy);
+ AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
+ Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
+ return CGF.Builder.CreateBitCast(AddRes, Ty);
+ }
case AArch64::BI__builtin_neon_vfmas_lane_f32:
case AArch64::BI__builtin_neon_vfmas_laneq_f32:
case AArch64::BI__builtin_neon_vfmad_lane_f64:
case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- // extract lane acc += x * v[i]
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
}
case AArch64::BI__builtin_neon_vqaddh_s16:
case AArch64::BI__builtin_neon_vqadds_s32:
case AArch64::BI__builtin_neon_vqaddd_s64:
- Int = Intrinsic::aarch64_neon_vqadds;
+ Int = Intrinsic::arm_neon_vqadds;
s = "vqadds"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddb_u8:
case AArch64::BI__builtin_neon_vqaddh_u16:
case AArch64::BI__builtin_neon_vqadds_u32:
case AArch64::BI__builtin_neon_vqaddd_u64:
- Int = Intrinsic::aarch64_neon_vqaddu;
+ Int = Intrinsic::arm_neon_vqaddu;
s = "vqaddu"; OverloadInt = true; break;
// Scalar Saturating Sub
case AArch64::BI__builtin_neon_vqsubb_s8:
case AArch64::BI__builtin_neon_vqsubh_s16:
case AArch64::BI__builtin_neon_vqsubs_s32:
case AArch64::BI__builtin_neon_vqsubd_s64:
- Int = Intrinsic::aarch64_neon_vqsubs;
+ Int = Intrinsic::arm_neon_vqsubs;
s = "vqsubs"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubb_u8:
case AArch64::BI__builtin_neon_vqsubh_u16:
case AArch64::BI__builtin_neon_vqsubs_u32:
case AArch64::BI__builtin_neon_vqsubd_u64:
- Int = Intrinsic::aarch64_neon_vqsubu;
+ Int = Intrinsic::arm_neon_vqsubu;
s = "vqsubu"; OverloadInt = true; break;
// Scalar Shift Left
case AArch64::BI__builtin_neon_vshld_s64:
// Signed Saturating Doubling Multiply Long
case AArch64::BI__builtin_neon_vqdmullh_s16:
case AArch64::BI__builtin_neon_vqdmulls_s32:
- Int = Intrinsic::aarch64_neon_vqdmull;
+ Int = Intrinsic::arm_neon_vqdmull;
s = "vqdmull"; OverloadWideInt = true; break;
// Scalar Signed Saturating Extract Unsigned Narrow
case AArch64::BI__builtin_neon_vqmovunh_s16:
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
}
-// CHECK_AARCH64: test_vmulx_lane_f64
+// CHECK: test_vmulx_lane_f64
float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
return vmulx_lane_f64(a, b, 0);
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
-// CHECK_AARCH64: test_vmulx_laneq_f64_0
+// CHECK: test_vmulx_laneq_f64_0
float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 0);
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
-// CHECK_AARCH64: test_vmulx_laneq_f64_1
+// CHECK: test_vmulx_laneq_f64_1
float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 1);
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
}
-// CHECK_AARCH64: test_vfmas_lane_f32
+// CHECK: test_vfmas_lane_f32
float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmas_lane_f32(a, b, c, 1);
// CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
-// CHECK_AARCH64: test_vfmad_lane_f64
+// CHECK: test_vfmad_lane_f64
float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
return vfmad_lane_f64(a, b, c, 0);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
-// CHECK_AARCH64: test_vfmad_laneq_f64
+// CHECK: test_vfmad_laneq_f64
float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
return vfmad_laneq_f64(a, b, c, 1);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
}
-// CHECK_AARCH64: test_vfmss_lane_f32
+// CHECK: test_vfmss_lane_f32
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmss_lane_f32(a, b, c, 1);
// CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
-// CHECK_AARCH64: test_vfma_lane_f64
+// CHECK: test_vfma_lane_f64
float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfma_lane_f64(a, b, v, 0);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
-// CHECK_AARCH64: test_vfms_lane_f64
+// CHECK: test_vfms_lane_f64
float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfms_lane_f64(a, b, v, 0);
// CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
-// CHECK_AARCH64: test_vfma_laneq_f64
+// CHECK: test_vfma_laneq_f64
float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfma_laneq_f64(a, b, v, 0);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
-// CHECK_AARCH64: test_vfms_laneq_f64
+// CHECK: test_vfms_laneq_f64
float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfms_laneq_f64(a, b, v, 0);
// CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
}
+// CHECK: test_vqdmullh_lane_s16
+int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
+ return vqdmullh_lane_s16(a, b, 3);
+ // CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmulls_lane_s32
+int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
+ return vqdmulls_lane_s32(a, b, 1);
+ // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vqdmullh_laneq_s16
+int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
+ return vqdmullh_laneq_s16(a, b, 7);
+ // CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vqdmulls_laneq_s32
+int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
+ return vqdmulls_laneq_s32(a, b, 3);
+ // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqdmulhh_lane_s16
+int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
+ return vqdmulhh_lane_s16(a, b, 3);
+// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmulhs_lane_s32
+int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
+ return vqdmulhs_lane_s32(a, b, 1);
+// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vqdmulhh_laneq_s16
+int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
+ return vqdmulhh_laneq_s16(a, b, 7);
+// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+
+// CHECK: test_vqdmulhs_laneq_s32
+int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
+ return vqdmulhs_laneq_s32(a, b, 3);
+// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqrdmulhh_lane_s16
+int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
+ return vqrdmulhh_lane_s16(a, b, 3);
+// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqrdmulhs_lane_s32
+int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
+ return vqrdmulhs_lane_s32(a, b, 1);
+// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vqrdmulhh_laneq_s16
+int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
+ return vqrdmulhh_laneq_s16(a, b, 7);
+// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+
+// CHECK: test_vqrdmulhs_laneq_s32
+int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
+ return vqrdmulhs_laneq_s32(a, b, 3);
+// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqdmlalh_lane_s16
+int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
+ return vqdmlalh_lane_s16(a, b, c, 3);
+// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmlals_lane_s32
+int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
+ return vqdmlals_lane_s32(a, b, c, 1);
+// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vqdmlalh_laneq_s16
+int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
+ return vqdmlalh_laneq_s16(a, b, c, 7);
+// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vqdmlals_laneq_s32
+int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
+ return vqdmlals_laneq_s32(a, b, c, 3);
+// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqdmlslh_lane_s16
+int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
+ return vqdmlslh_lane_s16(a, b, c, 3);
+// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmlsls_lane_s32
+int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
+ return vqdmlsls_lane_s32(a, b, c, 1);
+// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vqdmlslh_laneq_s16
+int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
+ return vqdmlslh_laneq_s16(a, b, c, 7);
+// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vqdmlsls_laneq_s32
+int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
+ return vqdmlsls_laneq_s32(a, b, c, 3);
+// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
OpScalarMulXLane,
OpScalarMulXLaneQ,
OpScalarVMulXLane,
- OpScalarVMulXLaneQ
+ OpScalarVMulXLaneQ,
+ OpScalarQDMullLane,
+ OpScalarQDMullLaneQ,
+ OpScalarQDMulHiLane,
+ OpScalarQDMulHiLaneQ,
+ OpScalarQRDMulHiLane,
+ OpScalarQRDMulHiLaneQ
};
enum ClassKind {
OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
+ OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
+ OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
+ OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
+ OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
+ OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
+ OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
+
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
case OpScalarMulLane: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
- "(__b, __c);\\\n __a * __d1;";
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
+ "(__b, __c);\\\n __a * __d1;";
break;
}
case OpScalarMulLaneQ: {
" vset_lane_" + typeCode + "(__f1, __g1, 0);";
break;
}
-
+ case OpScalarQDMullLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
+ "vget_lane_" + typeCode + "(b, __c));";
+ break;
+ }
+ case OpScalarQDMullLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
+ "vgetq_lane_" + typeCode + "(b, __c));";
+ break;
+ }
+ case OpScalarQDMulHiLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
+ "vget_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarQDMulHiLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
+ "vgetq_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarQRDMulHiLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
+ "vget_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarQRDMulHiLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
+ "vgetq_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
default:
PrintFatalError("unknown OpKind!");
}