From 1c2a88cfaeb11227d3a6bf7204207e0c8cf6de6f Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Wed, 9 Jun 2010 01:10:23 +0000 Subject: [PATCH] Implement transpose/zip/unzip & table lookup. Test out some basic constant-checking. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@105667 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsARM.def | 2 - lib/CodeGen/CGBuiltin.cpp | 85 +++++++++++++++++++++++++++-- lib/Headers/arm_neon.td | 6 +- lib/Sema/SemaChecking.cpp | 23 +++++++- 4 files changed, 107 insertions(+), 9 deletions(-) diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def index d219414f82..b168ac75a5 100644 --- a/include/clang/Basic/BuiltinsARM.def +++ b/include/clang/Basic/BuiltinsARM.def @@ -64,7 +64,6 @@ BUILTIN(__builtin_neon_vcvt_u32_v, "V2iV8ci", "n") BUILTIN(__builtin_neon_vcvtq_u32_v, "V4iV16ci", "n") BUILTIN(__builtin_neon_vext_v, "V8cV8cV8cii", "n") BUILTIN(__builtin_neon_vextq_v, "V16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vget_high_v, "V8cV16ci", "n") BUILTIN(__builtin_neon_vget_lane_i8, "UcV8ci", "n") BUILTIN(__builtin_neon_vget_lane_i16, "UsV8ci", "n") BUILTIN(__builtin_neon_vget_lane_i32, "UiV8ci", "n") @@ -75,7 +74,6 @@ BUILTIN(__builtin_neon_vgetq_lane_i32, "UiV16ci", "n") BUILTIN(__builtin_neon_vgetq_lane_f32, "fV16ci", "n") BUILTIN(__builtin_neon_vget_lane_i64, "ULLiV8ci", "n") BUILTIN(__builtin_neon_vgetq_lane_i64, "ULLiV16ci", "n") -BUILTIN(__builtin_neon_vget_low_v, "V8cV16ci", "n") BUILTIN(__builtin_neon_vhadd_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vhaddq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vhsub_v, "V8cV8cV8ci", "n") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 398e63c6e2..777be4da86 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1051,10 +1051,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__builtin_neon_vextq_v: { ConstantInt *C = dyn_cast(Ops[2]); int CV = C->getSExtValue(); - - SmallVector Indices; - const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext); + + SmallVector Indices; for (unsigned i = 0, e = cast(Ty)->getNumElements(); i != e; ++i) Indices.push_back(ConstantInt::get(I32Ty, i+CV)); @@ -1062,7 +1061,85 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); - return Builder.CreateShuffleVector(Ops[0], Ops[1], SV); + return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); + } + case ARM::BI__builtin_neon_vtbl1_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), + Ops, "vtbl1"); + case ARM::BI__builtin_neon_vtbl2_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), + Ops, "vtbl2"); + case ARM::BI__builtin_neon_vtbl3_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), + Ops, "vtbl3"); + case ARM::BI__builtin_neon_vtbl4_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), + Ops, "vtbl4"); + case ARM::BI__builtin_neon_vtbx1_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), + Ops, "vtbx1"); + case ARM::BI__builtin_neon_vtbx2_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), + Ops, "vtbx2"); + case ARM::BI__builtin_neon_vtbx3_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), + Ops, "vtbx3"); + case ARM::BI__builtin_neon_vtbx4_v: + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), + Ops, "vtbx4"); + case ARM::BI__builtin_neon_vtst_v: + case ARM::BI__builtin_neon_vtstq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); + Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], + ConstantAggregateZero::get(Ty)); + return Builder.CreateSExt(Ops[0], Ty, "vtst"); + } + // FIXME: transpose/zip/unzip don't currently match patterns for + // the non-q variants, but emitting 2 shufflevectors seems like a hack. + case ARM::BI__builtin_neon_vtrn_v: + case ARM::BI__builtin_neon_vtrnq_v: { + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext); + SmallVector Indices; + unsigned nElts = cast(Ty)->getNumElements(); + for (unsigned vi = 0; vi != 2; ++vi) { + for (unsigned i = 0; i != nElts; i += 2) { + Indices.push_back(ConstantInt::get(I32Ty, i+vi)); + Indices.push_back(ConstantInt::get(I32Ty, i+nElts+vi)); + } + } + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); + return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vtrn"); + } + case ARM::BI__builtin_neon_vuzp_v: + case ARM::BI__builtin_neon_vuzpq_v: { + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext); + SmallVector Indices; + unsigned nElts = cast(Ty)->getNumElements(); + for (unsigned vi = 0; vi != 2; ++vi) + for (unsigned i = 0; i != nElts; ++i) + Indices.push_back(ConstantInt::get(I32Ty, 2*i+vi)); + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); + return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vuzp"); + } + case ARM::BI__builtin_neon_vzip_v: + case ARM::BI__builtin_neon_vzipq_v: { + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext); + SmallVector Indices; + unsigned nElts = cast(Ty)->getNumElements(); + for (unsigned i = 0; i != nElts; ++i) { + Indices.push_back(ConstantInt::get(I32Ty, i)); + Indices.push_back(ConstantInt::get(I32Ty, i+nElts)); + } + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); + return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vzip"); } } } diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td index c8c8ecd3db..42b27ead6c 100644 --- a/lib/Headers/arm_neon.td +++ b/lib/Headers/arm_neon.td @@ -33,6 +33,8 @@ def OP_XOR : Op; def OP_ANDN : Op; def OP_ORN : Op; def OP_CAST : Op; +def OP_HI : Op; +def OP_LO : Op; def OP_CONC : Op; def OP_DUP : Op; @@ -234,8 +236,8 @@ def VCOMBINE : Inst<"kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// // E.3.21 Splitting vectors -def VGET_HIGH : WInst<"dk", "csilhfUcUsUiUlPcPs">; -def VGET_LOW : WInst<"dk", "csilhfUcUsUiUlPcPs">; +def VGET_HIGH : Inst<"dk", "csilhfUcUsUiUlPcPs", OP_HI>; +def VGET_LOW : Inst<"dk", "csilhfUcUsUiUlPcPs", OP_LO>; //////////////////////////////////////////////////////////////////////////////// // E.3.22 Converting vectors diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 76407ef7a7..6e54dab113 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -241,7 +241,28 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { } bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { - // TODO: verify NEON intrinsic constant args. + llvm::APSInt Result; + + switch (BuiltinID) { + case ARM::BI__builtin_neon_vget_lane_i8: + case ARM::BI__builtin_neon_vget_lane_i16: + case ARM::BI__builtin_neon_vget_lane_i32: + case ARM::BI__builtin_neon_vget_lane_f32: + case ARM::BI__builtin_neon_vget_lane_i64: + case ARM::BI__builtin_neon_vgetq_lane_i8: + case ARM::BI__builtin_neon_vgetq_lane_i16: + case ARM::BI__builtin_neon_vgetq_lane_i32: + case ARM::BI__builtin_neon_vgetq_lane_f32: + case ARM::BI__builtin_neon_vgetq_lane_i64: + // Check constant-ness first. + if (SemaBuiltinConstantArg(TheCall, 1, Result)) + return true; + break; + } + + // Now, range check values. + //unsigned lower = 0, upper = 0; + return false; } -- 2.50.1