Poly8,
Poly16,
Poly64,
+ Poly128,
Float16,
Float32,
Float64
def OP_MLA : Op;
def OP_MLAL : Op;
def OP_MULLHi : Op;
+def OP_MULLHi_P64 : Op;
def OP_MULLHi_N : Op;
def OP_MLALHi : Op;
def OP_MLALHi_N : Op;
// s: short
// i: int
// l: long
+// k: 128-bit long
// f: float
// h: half-float
// d: double
def LD4_DUP : WInst<"vld4_dup", "4c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def VLDRQ : WInst<"vldrq", "sc", "Pk">;
+def VSTRQ : WInst<"vstrq", "vps", "Pk">;
+
////////////////////////////////////////////////////////////////////////////////
// Addition
// With additional d, Qd type.
def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>;
def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>;
+def VMULL_P64 : SInst<"vmull", "rss", "Pl">;
+def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>;
+
////////////////////////////////////////////////////////////////////////////////
// Extract or insert element from vector
// With additional d, Qd, pl, Qpl types
def REINTERPRET
: NoTestOpInst<"vreinterpret", "dd",
- "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPl", OP_REINT>;
+ "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT>;
////////////////////////////////////////////////////////////////////////////////
case NeonTypeFlags::Int64:
case NeonTypeFlags::Poly64:
return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
+ case NeonTypeFlags::Poly128:
+ // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+ // There is a lot of i128 and f128 API missing.
+ // so we use v16i8 to represent poly128 and get pattern matched.
+ return llvm::VectorType::get(CGF->Int8Ty, 16);
case NeonTypeFlags::Float32:
return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Float64:
case AArch64::BI__builtin_neon_vcvtd_n_u64_f64:
Int = Intrinsic::aarch64_neon_vcvtd_n_u64_f64;
s = "fcvtzu"; OverloadInt = false; break;
+ case AArch64::BI__builtin_neon_vmull_p64:
+ Int = Intrinsic::aarch64_neon_vmull_p64;
+ s = "vmull"; OverloadInt = false; break;
}
if (!Int)
SmallVector<Value *, 4> Ops;
llvm::Value *Align = 0; // Alignment for load/store
+
+ if (BuiltinID == AArch64::BI__builtin_neon_vldrq_p128) {
+ Value *Op = EmitScalarExpr(E->getArg(0));
+ unsigned addressSpace =
+ cast<llvm::PointerType>(Op->getType())->getAddressSpace();
+ llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
+ Op = Builder.CreateBitCast(Op, Ty);
+ Op = Builder.CreateLoad(Op);
+ Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
+ return Builder.CreateBitCast(Op, Ty);
+ }
+ if (BuiltinID == AArch64::BI__builtin_neon_vstrq_p128) {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ unsigned addressSpace =
+ cast<llvm::PointerType>(Op0->getType())->getAddressSpace();
+ llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
+ Op0 = Builder.CreateBitCast(Op0, PTy);
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext());
+ Op1 = Builder.CreateBitCast(Op1, Ty);
+ return Builder.CreateStore(Op1, Op0);
+ }
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
if (i == 0) {
switch (BuiltinID) {
case NeonTypeFlags::Int64:
case NeonTypeFlags::Poly64:
return shift ? 63 : (1 << IsQuad) - 1;
+ case NeonTypeFlags::Poly128:
+ return shift ? 127 : (1 << IsQuad) - 1;
case NeonTypeFlags::Float16:
assert(!shift && "cannot shift float types!");
return (4 << IsQuad) - 1;
return IsAArch64 ? Context.UnsignedShortTy : Context.ShortTy;
case NeonTypeFlags::Poly64:
return Context.UnsignedLongLongTy;
+ case NeonTypeFlags::Poly128:
+ break;
case NeonTypeFlags::Float16:
return Context.HalfTy;
case NeonTypeFlags::Float32:
--- /dev/null
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics with poly128
+// FIXME: Currently, poly128_t equals to uint128, which will be spilt into
+// two 64-bit GPR(eg X0, X1). Now moving data from X0, X1 to FPR128 will
+// introduce 2 store and 1 load instructions(store X0, X1 to memory and
+// then load back to Q0). If target has NEON, this is better replaced by
+// FMOV or INS.
+
+#include <arm_neon.h>
+
+void test_vstrq_p128(poly128_t * ptr, poly128_t val) {
+ // CHECK: test_vstrq_p128
+ vstrq_p128(ptr, val);
+ // CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+ // CHECK-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}]
+}
+
+poly128_t test_vldrq_p128(poly128_t * ptr) {
+ // CHECK: test_vldrq_p128
+ return vldrq_p128(ptr);
+ // CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
+ // CHECK-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+}
+
+void test_ld_st_p128(poly128_t * ptr) {
+ // CHECK: test_ld_st_p128
+ vstrq_p128(ptr+1, vldrq_p128(ptr));
+ // CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
+ // CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16]
+}
+
+poly128_t test_vmull_p64(poly64_t a, poly64_t b) {
+ // CHECK: test_vmull_p64
+ return vmull_p64(a, b);
+ // CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
+}
+
+poly128_t test_vmull_high_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vmull_high_p64
+ return vmull_high_p64(a, b);
+ // CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_s8
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_s8(int8x16_t a) {
+ return vreinterpretq_p128_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_s16
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_s16(int16x8_t a) {
+ return vreinterpretq_p128_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_s32
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_s32(int32x4_t a) {
+ return vreinterpretq_p128_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_s64
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_s64(int64x2_t a) {
+ return vreinterpretq_p128_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_u8
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_u8(uint8x16_t a) {
+ return vreinterpretq_p128_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_u16
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_u16(uint16x8_t a) {
+ return vreinterpretq_p128_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_u32
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_u32(uint32x4_t a) {
+ return vreinterpretq_p128_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_u64
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_u64(uint64x2_t a) {
+ return vreinterpretq_p128_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_f32
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_f32(float32x4_t a) {
+ return vreinterpretq_p128_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_f64
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_f64(float64x2_t a) {
+ return vreinterpretq_p128_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_p8
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_p8(poly8x16_t a) {
+ return vreinterpretq_p128_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_p16
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_p16(poly16x8_t a) {
+ return vreinterpretq_p128_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p128_p64
+// CHECK: ret
+poly128_t test_vreinterpretq_p128_p64(poly64x2_t a) {
+ return vreinterpretq_p128_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_p128
+// CHECK: ret
+int8x16_t test_vreinterpretq_s8_p128(poly128_t a) {
+ return vreinterpretq_s8_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_p128
+// CHECK: ret
+int16x8_t test_vreinterpretq_s16_p128(poly128_t a) {
+ return vreinterpretq_s16_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_p128
+// CHECK: ret
+int32x4_t test_vreinterpretq_s32_p128(poly128_t a) {
+ return vreinterpretq_s32_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_p128
+// CHECK: ret
+int64x2_t test_vreinterpretq_s64_p128(poly128_t a) {
+ return vreinterpretq_s64_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_p128
+// CHECK: ret
+uint8x16_t test_vreinterpretq_u8_p128(poly128_t a) {
+ return vreinterpretq_u8_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_p128
+// CHECK: ret
+uint16x8_t test_vreinterpretq_u16_p128(poly128_t a) {
+ return vreinterpretq_u16_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_p128
+// CHECK: ret
+uint32x4_t test_vreinterpretq_u32_p128(poly128_t a) {
+ return vreinterpretq_u32_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_p128
+// CHECK: ret
+uint64x2_t test_vreinterpretq_u64_p128(poly128_t a) {
+ return vreinterpretq_u64_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_p128
+// CHECK: ret
+float32x4_t test_vreinterpretq_f32_p128(poly128_t a) {
+ return vreinterpretq_f32_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_p128
+// CHECK: ret
+float64x2_t test_vreinterpretq_f64_p128(poly128_t a) {
+ return vreinterpretq_f64_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_p128
+// CHECK: ret
+poly8x16_t test_vreinterpretq_p8_p128(poly128_t a) {
+ return vreinterpretq_p8_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_p128
+// CHECK: ret
+poly16x8_t test_vreinterpretq_p16_p128(poly128_t a) {
+ return vreinterpretq_p16_p128(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_p128
+// CHECK: ret
+poly64x2_t test_vreinterpretq_p64_p128(poly128_t a) {
+ return vreinterpretq_p64_p128(a);
+}
+
+
OpMla,
OpMlal,
OpMullHi,
+ OpMullHiP64,
OpMullHiN,
OpMlalHi,
OpMlalHiN,
Poly8,
Poly16,
Poly64,
+ Poly128,
Float16,
Float32,
Float64
OpMap["OP_MLA"] = OpMla;
OpMap["OP_MLAL"] = OpMlal;
OpMap["OP_MULLHi"] = OpMullHi;
+ OpMap["OP_MULLHi_P64"] = OpMullHiP64;
OpMap["OP_MULLHi_N"] = OpMullHiN;
OpMap["OP_MLALHi"] = OpMlalHi;
OpMap["OP_MLALHi_N"] = OpMlalHiN;
case 's':
case 'i':
case 'l':
+ case 'k':
case 'h':
case 'f':
case 'd':
return 'i';
case 'i':
return 'l';
+ case 'l':
+ return 'k';
case 'h':
return 'f';
case 'f':
return 's';
case 'l':
return 'i';
+ case 'k':
+ return 'l';
case 'f':
return 'h';
case 'd':
case 'l':
s += 'i';
break;
+ case 'k':
+ s += 'l';
+ break;
default:
s += ty[i];
break;
break;
s += quad ? "x2" : "x1";
break;
+ case 'k':
+ s += "poly128";
+ break;
case 'h':
s += "float16";
if (scal)
// Based on the modifying character, change the type and width if necessary.
type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
+ usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
+ scal && type != 'f' && type != 'd');
+
// All pointers are void* pointers. Change type to 'v' now.
if (pntr) {
usgn = false;
type = 's';
usgn = true;
}
- usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
- scal && type != 'f' && type != 'd');
if (scal) {
SmallString<128> s;
if (type == 'l') // 64-bit long
s += "LLi";
+ else if (type == 'k') // 128-bit long
+ s = "LLLi";
else
s.push_back(type);
default: break;
}
break;
+ case 'k':
+ assert(poly && "Unrecognized 128 bit integer.");
+ typeCode = "p128";
+ break;
case 'h':
switch (ck) {
case ClassS:
case 's': nElts = 4; break;
case 'i': nElts = 2; break;
case 'l': nElts = 1; break;
+ case 'k': nElts = 1; break;
case 'h': nElts = 4; break;
case 'f': nElts = 2; break;
case 'd':
case OpMullHi:
s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
break;
+ case OpMullHiP64: {
+ std::string Op1 = GetHigh("__a", typestr);
+ std::string Op2 = GetHigh("__b", typestr);
+ s += MangleName("vmull", typestr, ClassS);
+ s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");";
+ break;
+ }
case OpMullHiN:
s += MangleName("vmull_n", typestr, ClassS);
s += "(" + GetHigh("__a", typestr) + ", __b);";
case 'l':
ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
break;
+ case 'k':
+ ET = NeonTypeFlags::Poly128;
+ break;
case 'h':
ET = NeonTypeFlags::Float16;
break;
OS << "typedef uint8_t poly8_t;\n";
OS << "typedef uint16_t poly16_t;\n";
OS << "typedef uint64_t poly64_t;\n";
+ OS << "typedef __uint128_t poly128_t;\n";
OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
// Emit AArch64-specific intrinsics.
OS << "#ifdef __aarch64__\n";
+ emitIntrinsic(OS, Records.getDef("VMULL_P64"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
case 'd':
case 'l':
return (1 << (int)quad) - 1;
+ case 'k':
+ return 0;
default:
PrintFatalError("unhandled type!");
}
case 'd':
case 'l':
return 63;
+ case 'k':
+ return 127;
default:
PrintFatalError("unhandled type!");
}