From: Jin-Gu Kang Date: Tue, 4 Apr 2017 16:40:25 +0000 (+0000) Subject: Preserve vec3 type. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a463701ed2c3612812c34c1057a6dbdf1fdb8224;p=clang Preserve vec3 type. Summary: Preserve vec3 type with CodeGen option. Reviewers: Anastasia, bruno Reviewed By: Anastasia Subscribers: bruno, ahatanak, cfe-commits Differential Revision: https://reviews.llvm.org/D30810 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@299445 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td index a683efecee..d35844c1f1 100644 --- a/include/clang/Driver/CC1Options.td +++ b/include/clang/Driver/CC1Options.td @@ -658,6 +658,8 @@ def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">, HelpText<"Set default MS calling convention">; def finclude_default_header : Flag<["-"], "finclude-default-header">, HelpText<"Include the default header file for OpenCL">; +def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">, + HelpText<"Preserve 3-component vector type">; // FIXME: Remove these entirely once functionality/tests have been excised. def fobjc_gc_only : Flag<["-"], "fobjc-gc-only">, Group, diff --git a/include/clang/Frontend/CodeGenOptions.def b/include/clang/Frontend/CodeGenOptions.def index 751f5e2944..9565b22e70 100644 --- a/include/clang/Frontend/CodeGenOptions.def +++ b/include/clang/Frontend/CodeGenOptions.def @@ -263,6 +263,9 @@ CODEGENOPT(StrictReturn, 1, 1) /// Whether emit extra debug info for sample pgo profile collection. CODEGENOPT(DebugInfoForProfiling, 1, 0) +/// Whether 3-component vector type is preserved. +CODEGENOPT(PreserveVec3Type, 1, 0) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 1a95eb1045..38eb345d91 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1369,26 +1369,28 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, QualType TBAABaseType, uint64_t TBAAOffset, bool isNontemporal) { - // For better performance, handle vector loads differently. - if (Ty->isVectorType()) { - const llvm::Type *EltTy = Addr.getElementType(); - - const auto *VTy = cast(EltTy); - - // Handle vectors of size 3 like size 4 for better performance. - if (VTy->getNumElements() == 3) { - - // Bitcast to vec4 type. - llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), - 4); - Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); - // Now load value. - llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); - - // Shuffle vector to get vec3. - V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), - {0, 1, 2}, "extractVec"); - return EmitFromMemory(V, Ty); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + const llvm::Type *EltTy = Addr.getElementType(); + + const auto *VTy = cast(EltTy); + + // Handle vectors of size 3 like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = + llvm::VectorType::get(VTy->getElementType(), 4); + Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); + // Now load value. + llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), + {0, 1, 2}, "extractVec"); + return EmitFromMemory(V, Ty); + } } } @@ -1456,24 +1458,25 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, uint64_t TBAAOffset, bool isNontemporal) { - // Handle vectors differently to get better performance. - if (Ty->isVectorType()) { - llvm::Type *SrcTy = Value->getType(); - auto *VecTy = cast(SrcTy); - // Handle vec3 special. - if (VecTy->getNumElements() == 3) { - // Our source is a vec3, do a shuffle vector to make it a vec4. - llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), - Builder.getInt32(2), - llvm::UndefValue::get(Builder.getInt32Ty())}; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Value = Builder.CreateShuffleVector(Value, - llvm::UndefValue::get(VecTy), - MaskV, "extractVec"); - SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); - } - if (Addr.getElementType() != SrcTy) { - Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + auto *VecTy = cast(SrcTy); + // Handle vec3 special. + if (VecTy->getNumElements() == 3) { + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), + Builder.getInt32(2), + llvm::UndefValue::get(Builder.getInt32Ty())}; + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + if (Addr.getElementType() != SrcTy) { + Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + } } } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index e9041b282f..8ee4d3f7be 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -3593,8 +3593,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // vector to get a vec4, then a bitcast if the target type is different. if (NumElementsSrc == 3 && NumElementsDst != 3) { Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - DstTy); + + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + DstTy); + } + Src->setName("astype"); return Src; } @@ -3603,9 +3607,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // to vec4 if the original type is not vec4, then a shuffle vector to // get a vec3. if (NumElementsSrc != 3 && NumElementsDst == 3) { - auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - Vec4Ty); + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + Vec4Ty); + } + Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); Src->setName("astype"); return Src; diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 9138450181..271d1ca4f6 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -729,6 +729,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, } } + Opts.PreserveVec3Type = Args.hasArg(OPT_fpreserve_vec3_type); Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions); Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument); Opts.XRayInstructionThreshold = diff --git a/test/CodeGenOpenCL/preserve_vec3.cl b/test/CodeGenOpenCL/preserve_vec3.cl new file mode 100644 index 0000000000..6efbbb3d53 --- /dev/null +++ b/test/CodeGenOpenCL/preserve_vec3.cl @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s + +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); + +void kernel foo(global float3 *a, global float3 *b) { + // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a + // CHECK: store <3 x float> %[[LOAD_A]], <3 x float> addrspace(1)* %b + *b = *a; +} + +void kernel float4_to_float3(global float3 *a, global float4 *b) { + // CHECK: %[[LOAD_A:.*]] = load <4 x float>, <4 x float> addrspace(1)* %b, align 16 + // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> undef, <3 x i32> + // CHECK: store <3 x float> %[[ASTYPE:.*]], <3 x float> addrspace(1)* %a, align 16 + *a = __builtin_astype(*b, float3); +} + +void kernel float3_to_float4(global float3 *a, global float4 *b) { + // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a, align 16 + // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> undef, <4 x i32> + // CHECK: store <4 x float> %[[ASTYPE:.*]], <4 x float> addrspace(1)* %b, align 16 + *b = __builtin_astype(*a, float4); +}