From df98e1d1da5ab1ca7c325378fc1c2eaa90a6476d Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Wed, 9 Jun 2010 18:04:15 +0000 Subject: [PATCH] Implement codegen for hadd, hsub, max, min, mlal, movl, movn, padal, mov_n Make note about how to handle the dozen or so multiply by scalar ops. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@105734 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsARM.def | 62 ++++++++++++----------------- lib/CodeGen/CGBuiltin.cpp | 36 ++++++++++++++++- lib/Headers/arm_neon.td | 2 +- 3 files changed, 62 insertions(+), 38 deletions(-) diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def index 338d0c1d0c..26c5bec635 100644 --- a/include/clang/Basic/BuiltinsARM.def +++ b/include/clang/Basic/BuiltinsARM.def @@ -105,10 +105,10 @@ BUILTIN(__builtin_neon_vmin_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vminq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vmlal_v, "V16cV16cV8cV8ci", "n") BUILTIN(__builtin_neon_vmlal_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vmlal_n_s16, "V16cV16cV8cs", "n") -BUILTIN(__builtin_neon_vmlal_n_s32, "V16cV16cV8ci", "n") -BUILTIN(__builtin_neon_vmlal_n_u16, "V16cV16cV8cUs", "n") -BUILTIN(__builtin_neon_vmlal_n_u32, "V16cV16cV8cUi", "n") +BUILTIN(__builtin_neon_vmlal_n_s16, "V4iV4iV4ss", "n") +BUILTIN(__builtin_neon_vmlal_n_s32, "V2LLiV2LLiV2ii", "n") +BUILTIN(__builtin_neon_vmlal_n_u16, "V4iV4iV4sUs", "n") +BUILTIN(__builtin_neon_vmlal_n_u32, "V2LLiV2LLiV2iUi", "n") BUILTIN(__builtin_neon_vmla_lane_v, "V8cV8cV8cV8cii", "n") BUILTIN(__builtin_neon_vmlaq_lane_v, "V16cV16cV16cV16cii", "n") BUILTIN(__builtin_neon_vmla_n_i16, "V4sV4sV4sUs", "n") @@ -119,10 +119,10 @@ BUILTIN(__builtin_neon_vmlaq_n_i32, "V4iV4iV4iUi", "n") BUILTIN(__builtin_neon_vmlaq_n_f32, "V4fV4fV4ff", "n") BUILTIN(__builtin_neon_vmlsl_v, "V16cV16cV8cV8ci", "n") BUILTIN(__builtin_neon_vmlsl_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vmlsl_n_s16, "V16cV16cV8cs", "n") -BUILTIN(__builtin_neon_vmlsl_n_s32, "V16cV16cV8ci", "n") -BUILTIN(__builtin_neon_vmlsl_n_u16, "V16cV16cV8cUs", "n") -BUILTIN(__builtin_neon_vmlsl_n_u32, "V16cV16cV8cUi", "n") +BUILTIN(__builtin_neon_vmlsl_n_s16, "V4iV4iV4ss", "n") +BUILTIN(__builtin_neon_vmlsl_n_s32, "V2LLiV2LLiV2ii", "n") +BUILTIN(__builtin_neon_vmlsl_n_u16, "V4iV4iV4sUs", "n") +BUILTIN(__builtin_neon_vmlsl_n_u32, "V2LLiV2LLiV2iUi", "n") BUILTIN(__builtin_neon_vmls_lane_v, "V8cV8cV8cV8cii", "n") BUILTIN(__builtin_neon_vmlsq_lane_v, "V16cV16cV16cV16cii", "n") BUILTIN(__builtin_neon_vmls_n_i16, "V4sV4sV4sUs", "n") @@ -133,22 +133,12 @@ BUILTIN(__builtin_neon_vmlsq_n_i32, "V4iV4iV4iUi", "n") BUILTIN(__builtin_neon_vmlsq_n_f32, "V4fV4fV4ff", "n") BUILTIN(__builtin_neon_vmovl_v, "V16cV8ci", "n") BUILTIN(__builtin_neon_vmovn_v, "V8cV16ci", "n") -BUILTIN(__builtin_neon_vmov_n_i8, "V8cUc", "n") -BUILTIN(__builtin_neon_vmov_n_i16, "V4sUs", "n") -BUILTIN(__builtin_neon_vmov_n_i32, "V2iUi", "n") -BUILTIN(__builtin_neon_vmov_n_f32, "V2ff", "n") -BUILTIN(__builtin_neon_vmovq_n_i8, "V16cUc", "n") -BUILTIN(__builtin_neon_vmovq_n_i16, "V8sUs", "n") -BUILTIN(__builtin_neon_vmovq_n_i32, "V4iUi", "n") -BUILTIN(__builtin_neon_vmovq_n_f32, "V4ff", "n") -BUILTIN(__builtin_neon_vmov_n_i64, "V1LLiULLi", "n") -BUILTIN(__builtin_neon_vmovq_n_i64, "V2LLiULLi", "n") BUILTIN(__builtin_neon_vmull_v, "V16cV8cV8ci", "n") BUILTIN(__builtin_neon_vmull_lane_v, "V16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vmull_n_s16, "V16cV8cs", "n") -BUILTIN(__builtin_neon_vmull_n_s32, "V16cV8ci", "n") -BUILTIN(__builtin_neon_vmull_n_u16, "V16cV8cUs", "n") -BUILTIN(__builtin_neon_vmull_n_u32, "V16cV8cUi", "n") +BUILTIN(__builtin_neon_vmull_n_s16, "V4iV4ss", "n") +BUILTIN(__builtin_neon_vmull_n_s32, "V2LLiV2ii", "n") +BUILTIN(__builtin_neon_vmull_n_u16, "V4iV4sUs", "n") +BUILTIN(__builtin_neon_vmull_n_u32, "V2LLiV2iUi", "n") BUILTIN(__builtin_neon_vmul_n_i16, "V4sV4sUs", "n") BUILTIN(__builtin_neon_vmul_n_i32, "V2iV2iUi", "n") BUILTIN(__builtin_neon_vmul_n_f32, "V2fV2ff", "n") @@ -168,24 +158,24 @@ BUILTIN(__builtin_neon_vqadd_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqaddq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vqdmlal_v, "V16cV16cV8cV8ci", "n") BUILTIN(__builtin_neon_vqdmlal_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vqdmlal_n_s16, "V16cV16cV8cs", "n") -BUILTIN(__builtin_neon_vqdmlal_n_s32, "V16cV16cV8ci", "n") +BUILTIN(__builtin_neon_vqdmlal_n_s16, "V4iV4iV4ss", "n") +BUILTIN(__builtin_neon_vqdmlal_n_s32, "V2LLiV2LLiV2ii", "n") BUILTIN(__builtin_neon_vqdmlsl_v, "V16cV16cV8cV8ci", "n") BUILTIN(__builtin_neon_vqdmlsl_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vqdmlsl_n_s16, "V16cV16cV8cs", "n") -BUILTIN(__builtin_neon_vqdmlsl_n_s32, "V16cV16cV8ci", "n") +BUILTIN(__builtin_neon_vqdmlsl_n_s16, "V4iV4iV4ss", "n") +BUILTIN(__builtin_neon_vqdmlsl_n_s32, "V2LLiV2LLiV2ii", "n") BUILTIN(__builtin_neon_vqdmulh_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqdmulhq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vqdmulh_lane_v, "V8cV8cV8cii", "n") BUILTIN(__builtin_neon_vqdmulhq_lane_v, "V16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vqdmulh_n_s16, "V8cV8cs", "n") -BUILTIN(__builtin_neon_vqdmulh_n_s32, "V8cV8ci", "n") -BUILTIN(__builtin_neon_vqdmulhq_n_s16, "V16cV16cs", "n") -BUILTIN(__builtin_neon_vqdmulhq_n_s32, "V16cV16ci", "n") +BUILTIN(__builtin_neon_vqdmulh_n_s16, "V4sV4ss", "n") +BUILTIN(__builtin_neon_vqdmulh_n_s32, "V2iV2ii", "n") +BUILTIN(__builtin_neon_vqdmulhq_n_s16, "V8sV8ss", "n") +BUILTIN(__builtin_neon_vqdmulhq_n_s32, "V4iV4ii", "n") BUILTIN(__builtin_neon_vqdmull_v, "V16cV8cV8ci", "n") BUILTIN(__builtin_neon_vqdmull_lane_v, "V16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vqdmull_n_s16, "V16cV8cs", "n") -BUILTIN(__builtin_neon_vqdmull_n_s32, "V16cV8ci", "n") +BUILTIN(__builtin_neon_vqdmull_n_s16, "V4iV4ss", "n") +BUILTIN(__builtin_neon_vqdmull_n_s32, "V2LLiV2ii", "n") BUILTIN(__builtin_neon_vqmovn_v, "V8cV16ci", "n") BUILTIN(__builtin_neon_vqmovun_v, "V8cV16ci", "n") BUILTIN(__builtin_neon_vqneg_v, "V8cV8ci", "n") @@ -194,10 +184,10 @@ BUILTIN(__builtin_neon_vqrdmulh_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqrdmulhq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vqrdmulh_lane_v, "V8cV8cV8cii", "n") BUILTIN(__builtin_neon_vqrdmulhq_lane_v, "V16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vqrdmulh_n_s16, "V8cV8cs", "n") -BUILTIN(__builtin_neon_vqrdmulh_n_s32, "V8cV8ci", "n") -BUILTIN(__builtin_neon_vqrdmulhq_n_s16, "V16cV16cs", "n") -BUILTIN(__builtin_neon_vqrdmulhq_n_s32, "V16cV16ci", "n") +BUILTIN(__builtin_neon_vqrdmulh_n_s16, "V4sV4ss", "n") +BUILTIN(__builtin_neon_vqrdmulh_n_s32, "V2iV2ii", "n") +BUILTIN(__builtin_neon_vqrdmulhq_n_s16, "V8sV8ss", "n") +BUILTIN(__builtin_neon_vqrdmulhq_n_s32, "V4iV4ii", "n") BUILTIN(__builtin_neon_vqrshl_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqrshlq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vqrshrn_n_v, "V8cV16cii", "n") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 3ef3e17198..3e8fec5b35 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1068,7 +1068,41 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__builtin_neon_vgetq_lane_i32: case ARM::BI__builtin_neon_vgetq_lane_i64: case ARM::BI__builtin_neon_vgetq_lane_f32: - return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1))); + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vget_lane"); + case ARM::BI__builtin_neon_vhadd_v: + case ARM::BI__builtin_neon_vhaddq_v: + Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd"); + case ARM::BI__builtin_neon_vhsub_v: + case ARM::BI__builtin_neon_vhsubq_v: + Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub"); + // FIXME: vld* + case ARM::BI__builtin_neon_vmax_v: + case ARM::BI__builtin_neon_vmaxq_v: + Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax"); + case ARM::BI__builtin_neon_vmin_v: + case ARM::BI__builtin_neon_vminq_v: + Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin"); + // FIXME: vmlal_lane -> splat, drop imm + case ARM::BI__builtin_neon_vmlal_v: + Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal"); + // FIXME: vmlal_n, vmla_n, vmlsl_n, vmls_n, vmull_n, vmul_n, + // vqdmlal_n, vqdmlsl_n, vqdmulh_n, vqdmull_n, vqrdmulh_n -> splat,-_n + case ARM::BI__builtin_neon_vmovl_v: + Int = usgn ? Intrinsic::arm_neon_vmovlu : Intrinsic::arm_neon_vmovls; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmovl"); + case ARM::BI__builtin_neon_vmovn_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmovn, &Ty, 1), + Ops, "vmovn"); + case ARM::BI__builtin_neon_vpadal_v: + case ARM::BI__builtin_neon_vpadalq_v: + Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals; + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpadal"); case ARM::BI__builtin_neon_vtbl1_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), Ops, "vtbl1"); diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td index 42b27ead6c..4dbecb66b9 100644 --- a/lib/Headers/arm_neon.td +++ b/lib/Headers/arm_neon.td @@ -228,7 +228,7 @@ def VCREATE: Inst<"dl", "csihfUcUsUiUlPcPsl", OP_CAST>; //////////////////////////////////////////////////////////////////////////////// // E.3.19 Set all lanes to same value def VDUP_N : Inst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; -def VMOV_N : IInst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; +def VMOV_N : Inst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; //////////////////////////////////////////////////////////////////////////////// // E.3.20 Combining vectors -- 2.40.0