From: Tim Northover Date: Wed, 28 Aug 2013 09:46:37 +0000 (+0000) Subject: ARM: Emit normal IR for vaddhn/vsubhn NEON intrinsics X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6bf1e8eadd36f077cf5403d23c664129d98eaf79;p=clang ARM: Emit normal IR for vaddhn/vsubhn NEON intrinsics These operations "vector add high-half narrow" actually correspond to the sequence: %sum = add <4 x i32> %lhs, %rhs %high = lshr <4 x i32> %sum, %res = trunc <4 x i32> %high to <4 x i16> Now that LLVM can spot this, Clang should emit the corresponding LLVM IR. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@189463 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index c9a2fcbf46..59b25d689f 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2202,9 +2202,24 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__builtin_neon_vabsq_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty), Ops, "vabs"); - case ARM::BI__builtin_neon_vaddhn_v: - return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty), - Ops, "vaddhn"); + case ARM::BI__builtin_neon_vaddhn_v: { + llvm::VectorType *SrcTy = + llvm::VectorType::getExtendedElementVectorType(VTy); + + // %sum = add <4 x i32> %lhs, %rhs + Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); + Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); + Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); + + // %high = lshr <4 x i32> %sum, + Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), + SrcTy->getScalarSizeInBits() / 2); + ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); + Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); + + // %res = trunc <4 x i32> %high to <4 x i16> + return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); + } case ARM::BI__builtin_neon_vcale_v: std::swap(Ops[0], Ops[1]); case ARM::BI__builtin_neon_vcage_v: { @@ -2782,9 +2797,24 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops.push_back(Align); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty), Ops, ""); - case ARM::BI__builtin_neon_vsubhn_v: - return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty), - Ops, "vsubhn"); + case ARM::BI__builtin_neon_vsubhn_v: { + llvm::VectorType *SrcTy = + llvm::VectorType::getExtendedElementVectorType(VTy); + + // %sum = add <4 x i32> %lhs, %rhs + Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); + Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); + Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); + + // %high = lshr <4 x i32> %sum, + Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), + SrcTy->getScalarSizeInBits() / 2); + ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); + Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); + + // %res = trunc <4 x i32> %high to <4 x i16> + return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); + } case ARM::BI__builtin_neon_vtbl1_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), Ops, "vtbl1");