From: Tanya Lattner Date: Thu, 16 Aug 2012 00:10:13 +0000 (+0000) Subject: Convert loads and stores of vec3 to vec4 to achieve better code generation. Add test... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c58dcdc8facb646d88675bb6fbcb5c787166c4be;p=clang Convert loads and stores of vec3 to vec4 to achieve better code generation. Add test case. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@162002 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index ecee7b4931..1fe4c18bad 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -938,6 +938,50 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile, unsigned Alignment, QualType Ty, llvm::MDNode *TBAAInfo) { + + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + llvm::Value *V; + const llvm::Type *EltTy = + cast(Addr->getType())->getElementType(); + + const llvm::VectorType *VTy = cast(EltTy); + + // Handle vectors of size 3, like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), + 4); + llvm::PointerType *ptVec4Ty = + llvm::PointerType::get(vec4Ty, + (cast( + Addr->getType()))->getAddressSpace()); + llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty, + "castToVec4"); + // Now load value. + llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + llvm::SmallVector Mask; + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 0)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 1)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 2)); + + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + V = Builder.CreateShuffleVector(LoadVal, + llvm::UndefValue::get(vec4Ty), + MaskV, "extractVec"); + return EmitFromMemory(V, Ty); + } + } + llvm::LoadInst *Load = Builder.CreateLoad(Addr); if (Volatile) Load->setVolatile(true); @@ -984,6 +1028,42 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr, QualType Ty, llvm::MDNode *TBAAInfo, bool isInit) { + + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + llvm::VectorType *VecTy = cast(SrcTy); + // Handle vec3 special. + if (VecTy->getNumElements() == 3) { + llvm::LLVMContext &VMContext = getLLVMContext(); + + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::SmallVector Mask; + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 0)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 1)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 2)); + Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext))); + + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, + llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + llvm::PointerType *DstPtr = cast(Addr->getType()); + if (DstPtr->getElementType() != SrcTy) { + llvm::Type *MemTy = + llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace()); + Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp"); + } + } + Value = EmitToMemory(Value, Ty); llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); diff --git a/test/CodeGen/alignment.c b/test/CodeGen/alignment.c index 8882c91d03..5051a98db9 100644 --- a/test/CodeGen/alignment.c +++ b/test/CodeGen/alignment.c @@ -43,7 +43,8 @@ void test3(packedfloat3 *p) { *p = (packedfloat3) { 3.2f, 2.3f, 0.1f }; } // CHECK: @test3( -// CHECK: store <3 x float> {{.*}}, align 4 +// CHECK: bitcast <3 x float>* %.compoundliteral to <4 x float>* +// CHECK: store <4 x float> {{.*}}, align 4 // CHECK: ret void diff --git a/test/CodeGenOpenCL/vectorLoadStore.cl b/test/CodeGenOpenCL/vectorLoadStore.cl new file mode 100644 index 0000000000..ba0405d098 --- /dev/null +++ b/test/CodeGenOpenCL/vectorLoadStore.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | FileCheck %s + +typedef char char3 __attribute((ext_vector_type(3)));; + +// Check for optimized vec3 load/store which treats vec3 as vec4. +void foo(char3 *P, char3 *Q) { + *P = *Q; + // CHECK: %extractVec = shufflevector <4 x i8> %loadVec4, <4 x i8> undef, <3 x i32> +}