From: Tim Northover Date: Thu, 30 Jan 2014 14:47:57 +0000 (+0000) Subject: ARM & AArch64: fully share NEON implementation of permutation intrinsics X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a8289924edf9753de38a62ecfe93f4c89bcffc0f;p=clang ARM & AArch64: fully share NEON implementation of permutation intrinsics As a starting point, this moves the CodeGen for NEON permutation instructions (vtrn, vzip, vuzp) into a new shared function. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@200471 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index bd0301d741..b03daa85d5 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1754,6 +1754,76 @@ CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { return std::make_pair(EmitScalarExpr(Addr), Align); } +Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + SmallVectorImpl &Ops, + llvm::VectorType *VTy) { + switch (BuiltinID) { + default: break; + case NEON::BI__builtin_neon_vtrn_v: + case NEON::BI__builtin_neon_vtrnq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + Ops[1] = Builder.CreateBitCast(Ops[1], VTy); + Ops[2] = Builder.CreateBitCast(Ops[2], VTy); + Value *SV = 0; + + for (unsigned vi = 0; vi != 2; ++vi) { + SmallVector Indices; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { + Indices.push_back(Builder.getInt32(i+vi)); + Indices.push_back(Builder.getInt32(i+e+vi)); + } + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); + SV = Builder.CreateStore(SV, Addr); + } + return SV; + } + case NEON::BI__builtin_neon_vuzp_v: + case NEON::BI__builtin_neon_vuzpq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + Ops[1] = Builder.CreateBitCast(Ops[1], VTy); + Ops[2] = Builder.CreateBitCast(Ops[2], VTy); + Value *SV = 0; + + for (unsigned vi = 0; vi != 2; ++vi) { + SmallVector Indices; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) + Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); + + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); + SV = Builder.CreateStore(SV, Addr); + } + return SV; + } + case NEON::BI__builtin_neon_vzip_v: + case NEON::BI__builtin_neon_vzipq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + Ops[1] = Builder.CreateBitCast(Ops[1], VTy); + Ops[2] = Builder.CreateBitCast(Ops[2], VTy); + Value *SV = 0; + + for (unsigned vi = 0; vi != 2; ++vi) { + SmallVector Indices; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { + Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); + Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); + } + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); + SV = Builder.CreateStore(SV, Addr); + } + return SV; + } + } + + return 0; +} + static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { @@ -2981,6 +3051,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (!Ty) return 0; + // Many NEON builtins have identical semantics and uses in ARM and + // AArch64. Emit these in a single function. + if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, VTy)) + return Result; + unsigned Int; switch (BuiltinID) { default: @@ -2989,18 +3064,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // AArch64 builtins mapping to legacy ARM v7 builtins. // FIXME: the mapped builtins listed correspond to what has been tested // in aarch64-neon-intrinsics.c so far. - case NEON::BI__builtin_neon_vuzp_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vuzp_v, E); - case NEON::BI__builtin_neon_vuzpq_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vuzpq_v, E); - case NEON::BI__builtin_neon_vzip_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vzip_v, E); - case NEON::BI__builtin_neon_vzipq_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vzipq_v, E); - case NEON::BI__builtin_neon_vtrn_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vtrn_v, E); - case NEON::BI__builtin_neon_vtrnq_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vtrnq_v, E); case NEON::BI__builtin_neon_vext_v: return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vext_v, E); case NEON::BI__builtin_neon_vextq_v: @@ -4213,6 +4276,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (!Ty) return 0; + // Many NEON builtins have identical semantics and uses in ARM and + // AArch64. Emit these in a single function. + if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, VTy)) + return Result; + unsigned Int; switch (BuiltinID) { default: return 0; @@ -4869,65 +4937,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, ConstantAggregateZero::get(Ty)); return Builder.CreateSExt(Ops[0], Ty, "vtst"); } - case NEON::BI__builtin_neon_vtrn_v: - case NEON::BI__builtin_neon_vtrnq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Value *SV = 0; - - for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(Builder.getInt32(i+vi)); - Indices.push_back(Builder.getInt32(i+e+vi)); - } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); - SV = Builder.CreateStore(SV, Addr); - } - return SV; - } - case NEON::BI__builtin_neon_vuzp_v: - case NEON::BI__builtin_neon_vuzpq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Value *SV = 0; - - for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); - - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); - SV = Builder.CreateStore(SV, Addr); - } - return SV; - } - case NEON::BI__builtin_neon_vzip_v: - case NEON::BI__builtin_neon_vzipq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Value *SV = 0; - - for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); - Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); - } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); - SV = Builder.CreateStore(SV, Addr); - } - return SV; - } } } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index ff3746ad18..ac583e4d1c 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2177,6 +2177,9 @@ public: llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + SmallVectorImpl &Ops, + llvm::VectorType *VTy); llvm::Value *EmitNeonCall(llvm::Function *F, SmallVectorImpl &O, const char *name,