From: Nate Begeman Date: Sun, 18 Oct 2009 20:10:40 +0000 (+0000) Subject: Support IRgen of OpenCL vector initializers, ensuring the resulting IR is in a X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0533b3020ca39898751d4200feed776861dcd1ce;p=clang Support IRgen of OpenCL vector initializers, ensuring the resulting IR is in a form that LLVM code generators can turn into efficient code. For example, int4 a, b, c; a = (int4)(b.yzw, a.x) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@84434 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index d986549555..c3598edd8b 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -181,48 +181,7 @@ public: Value *VisitPredefinedExpr(Expr *E) { return EmitLValue(E).getAddress(); } - Value *VisitInitListExpr(InitListExpr *E) { - bool Ignore = TestAndClearIgnoreResultAssign(); - (void)Ignore; - assert (Ignore == false && "init list ignored"); - unsigned NumInitElements = E->getNumInits(); - - if (E->hadArrayRangeDesignator()) { - CGF.ErrorUnsupported(E, "GNU array range designator extension"); - } - - const llvm::VectorType *VType = - dyn_cast(ConvertType(E->getType())); - - // We have a scalar in braces. Just use the first element. - if (!VType) - return Visit(E->getInit(0)); - - unsigned NumVectorElements = VType->getNumElements(); - const llvm::Type *ElementType = VType->getElementType(); - - // Emit individual vector element stores. - llvm::Value *V = llvm::UndefValue::get(VType); - - // Emit initializers - unsigned i; - for (i = 0; i < NumInitElements; ++i) { - Value *NewV = Visit(E->getInit(i)); - Value *Idx = - llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGF.getLLVMContext()), i); - V = Builder.CreateInsertElement(V, NewV, Idx); - } - - // Emit remaining default initializers - for (/* Do not initialize i*/; i < NumVectorElements; ++i) { - Value *Idx = - llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGF.getLLVMContext()), i); - llvm::Value *NewV = llvm::Constant::getNullValue(ElementType); - V = Builder.CreateInsertElement(V, NewV, Idx); - } - - return V; - } + Value *VisitInitListExpr(InitListExpr *E); Value *VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) { return llvm::Constant::getNullValue(ConvertType(E->getType())); @@ -616,6 +575,174 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { return Builder.CreateExtractElement(Base, Idx, "vecext"); } +static llvm::Constant *getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx, + unsigned Off, const llvm::Type *I32Ty) { + int MV = SVI->getMaskValue(Idx); + if (MV == -1) + return llvm::UndefValue::get(I32Ty); + return llvm::ConstantInt::get(I32Ty, Off+MV); +} + +Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { + bool Ignore = TestAndClearIgnoreResultAssign(); + (void)Ignore; + assert (Ignore == false && "init list ignored"); + unsigned NumInitElements = E->getNumInits(); + + if (E->hadArrayRangeDesignator()) + CGF.ErrorUnsupported(E, "GNU array range designator extension"); + + const llvm::VectorType *VType = + dyn_cast(ConvertType(E->getType())); + + // We have a scalar in braces. Just use the first element. + if (!VType) + return Visit(E->getInit(0)); + + unsigned ResElts = VType->getNumElements(); + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CGF.getLLVMContext()); + + // Loop over initializers collecting the Value for each, and remembering + // whether the source was swizzle (ExtVectorElementExpr). This will allow + // us to fold the shuffle for the swizzle into the shuffle for the vector + // initializer, since LLVM optimizers generally do not want to touch + // shuffles. + unsigned CurIdx = 0; + bool VIsUndefShuffle = false; + llvm::Value *V = llvm::UndefValue::get(VType); + for (unsigned i = 0; i != NumInitElements; ++i) { + Expr *IE = E->getInit(i); + Value *Init = Visit(IE); + llvm::SmallVector Args; + + const llvm::VectorType *VVT = dyn_cast(Init->getType()); + + // Handle scalar elements. If the scalar initializer is actually one + // element of a different vector of the same width, use shuffle instead of + // extract+insert. + if (!VVT) { + if (isa(IE)) { + llvm::ExtractElementInst *EI = cast(Init); + + if (EI->getVectorOperandType()->getNumElements() == ResElts) { + llvm::ConstantInt *C = cast(EI->getIndexOperand()); + Value *LHS = 0, *RHS = 0; + if (CurIdx == 0) { + // insert into undef -> shuffle (src, undef) + Args.push_back(C); + for (unsigned j = 1; j != ResElts; ++j) + Args.push_back(llvm::UndefValue::get(I32Ty)); + + LHS = EI->getVectorOperand(); + RHS = V; + VIsUndefShuffle = true; + } else if (VIsUndefShuffle) { + // insert into undefshuffle && size match -> shuffle (v, src) + llvm::ShuffleVectorInst *SVV = cast(V); + for (unsigned j = 0; j != CurIdx; ++j) + Args.push_back(getMaskElt(SVV, j, 0, I32Ty)); + Args.push_back(llvm::ConstantInt::get(I32Ty, + ResElts + C->getZExtValue())); + for (unsigned j = CurIdx + 1; j != ResElts; ++j) + Args.push_back(llvm::UndefValue::get(I32Ty)); + + LHS = cast(V)->getOperand(0); + RHS = EI->getVectorOperand(); + VIsUndefShuffle = false; + } + if (!Args.empty()) { + llvm::Constant *Mask = llvm::ConstantVector::get(&Args[0], ResElts); + V = Builder.CreateShuffleVector(LHS, RHS, Mask); + ++CurIdx; + continue; + } + } + } + Value *Idx = llvm::ConstantInt::get(I32Ty, CurIdx); + V = Builder.CreateInsertElement(V, Init, Idx, "vecinit"); + VIsUndefShuffle = false; + ++CurIdx; + continue; + } + + unsigned InitElts = VVT->getNumElements(); + + // If the initializer is an ExtVecEltExpr (a swizzle), and the swizzle's + // input is the same width as the vector being constructed, generate an + // optimized shuffle of the swizzle input into the result. + if (isa(IE)) { + llvm::ShuffleVectorInst *SVI = cast(Init); + Value *SVOp = SVI->getOperand(0); + const llvm::VectorType *OpTy = cast(SVOp->getType()); + + if (OpTy->getNumElements() == ResElts) { + unsigned Offset = (CurIdx == 0) ? 0 : ResElts; + + for (unsigned j = 0; j != CurIdx; ++j) { + // If the current vector initializer is a shuffle with undef, merge + // this shuffle directly into it. + if (VIsUndefShuffle) { + Args.push_back(getMaskElt(cast(V), j, 0, + I32Ty)); + } else { + Args.push_back(llvm::ConstantInt::get(I32Ty, j)); + } + } + for (unsigned j = 0, je = InitElts; j != je; ++j) + Args.push_back(getMaskElt(SVI, j, Offset, I32Ty)); + for (unsigned j = CurIdx + InitElts; j != ResElts; ++j) + Args.push_back(llvm::UndefValue::get(I32Ty)); + + if (VIsUndefShuffle) + V = cast(V)->getOperand(0); + + Init = SVOp; + } + } + + // Extend init to result vector length, and then shuffle its contribution + // to the vector initializer into V. + if (Args.empty()) { + for (unsigned j = 0; j != InitElts; ++j) + Args.push_back(llvm::ConstantInt::get(I32Ty, j)); + for (unsigned j = InitElts; j != ResElts; ++j) + Args.push_back(llvm::UndefValue::get(I32Ty)); + llvm::Constant *Mask = llvm::ConstantVector::get(&Args[0], ResElts); + Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT), + Mask, "vecext"); + + Args.clear(); + for (unsigned j = 0; j != CurIdx; ++j) + Args.push_back(llvm::ConstantInt::get(I32Ty, j)); + for (unsigned j = 0; j != InitElts; ++j) + Args.push_back(llvm::ConstantInt::get(I32Ty, j+ResElts)); + for (unsigned j = CurIdx + InitElts; j != ResElts; ++j) + Args.push_back(llvm::UndefValue::get(I32Ty)); + } + + // If V is undef, make sure it ends up on the RHS of the shuffle to aid + // merging subsequent shuffles into this one. + if (CurIdx == 0) + std::swap(V, Init); + llvm::Constant *Mask = llvm::ConstantVector::get(&Args[0], ResElts); + V = Builder.CreateShuffleVector(V, Init, Mask, "vecinit"); + VIsUndefShuffle = isa(Init); + CurIdx += InitElts; + } + + // FIXME: evaluate codegen vs. shuffling against constant null vector. + // Emit remaining default initializers. + const llvm::Type *EltTy = VType->getElementType(); + + // Emit remaining default initializers + for (/* Do not initialize i*/; CurIdx < ResElts; ++CurIdx) { + Value *Idx = llvm::ConstantInt::get(I32Ty, CurIdx); + llvm::Value *Init = llvm::Constant::getNullValue(EltTy); + V = Builder.CreateInsertElement(V, Init, Idx, "vecinit"); + } + return V; +} + // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts // have to handle a more broad range of conversions than explicit casts, as they // handle things like function to ptr-to-function decay etc.