From: Steve Naroff Date: Tue, 31 Mar 2009 16:53:37 +0000 (+0000) Subject: Some "prep" work for handling ObjC @-string constants that contain UTF-8. No function... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=33fdb738a6c125f4c788733897021b7c1a062b0c;p=clang Some "prep" work for handling ObjC @-string constants that contain UTF-8. No functionality change. Changed GenerateConstantString() to take an ObjCStringLiteral (instead of a std::string). While this isn't strictly necessary, it seems cleaner and allows us to cache to "containsNonAscii" if necessary (to avoid checking in both Sema and CodeGen). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@68114 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index 2271a4e6bb..832f83ca73 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -533,7 +533,12 @@ public: const char *getStrData() const { return StrData; } unsigned getByteLength() const { return ByteLength; } bool isWide() const { return IsWide; } - + bool containsNonAscii() const { + for (unsigned i = 0; i < getByteLength(); ++i) + if (!isascii(getStrData()[i])) + return true; + return false; + } /// getNumConcatenated - Get the number of string literal tokens that were /// concatenated in translation phase #6 to form this string literal. unsigned getNumConcatenated() const { return NumConcatenated; } diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 3f52175745..89b331950c 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -417,9 +417,7 @@ public: return CGM.GetAddrOfConstantStringFromObjCEncode(cast(E)); case Expr::ObjCStringLiteralClass: { ObjCStringLiteral* SL = cast(E); - std::string S(SL->getString()->getStrData(), - SL->getString()->getByteLength()); - llvm::Constant *C = CGM.getObjCRuntime().GenerateConstantString(S); + llvm::Constant *C = CGM.getObjCRuntime().GenerateConstantString(SL); return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType())); } case Expr::PredefinedExprClass: { @@ -445,6 +443,7 @@ public: const Expr *Arg = CE->getArg(0)->IgnoreParenCasts(); const StringLiteral *Literal = cast(Arg); std::string S(Literal->getStrData(), Literal->getByteLength()); + // FIXME: need to deal with UCN conversion issues. return CGM.GetAddrOfConstantCFString(S); } case Expr::BlockExprClass: { diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 2467a8290a..4db29ce05d 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -26,9 +26,7 @@ using namespace CodeGen; /// Emits an instance of NSConstantString representing the object. llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E) { - std::string String(E->getString()->getStrData(), - E->getString()->getByteLength()); - llvm::Constant *C = CGM.getObjCRuntime().GenerateConstantString(String); + llvm::Constant *C = CGM.getObjCRuntime().GenerateConstantString(E); // FIXME: This bitcast should just be made an invariant on the Runtime. return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType())); } diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index e6ca536c66..bc713bd8cc 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -94,7 +94,7 @@ private: std::vector &V, const std::string &Name=""); public: CGObjCGNU(CodeGen::CodeGenModule &cgm); - virtual llvm::Constant *GenerateConstantString(const std::string &String); + virtual llvm::Constant *GenerateConstantString(const ObjCStringLiteral *); virtual CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF, QualType ResultType, @@ -252,7 +252,9 @@ llvm::Constant *CGObjCGNU::MakeGlobal(const llvm::ArrayType *Ty, //TODO: In case there are any crazy people still using the GNU runtime without //an OpenStep implementation, this should let them select their own class for //constant strings. -llvm::Constant *CGObjCGNU::GenerateConstantString(const std::string &Str) { +llvm::Constant *CGObjCGNU::GenerateConstantString(const ObjCStringLiteral *SL) { + std::string Str(SL->getString()->getStrData(), + SL->getString()->getByteLength()); std::vector Ivars; Ivars.push_back(NULLPtr); Ivars.push_back(MakeConstantString(Str)); diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 05d81eaf0f..f688029fb2 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -487,7 +487,7 @@ public: CGObjCCommonMac(CodeGen::CodeGenModule &cgm) : CGM(cgm) { } - virtual llvm::Constant *GenerateConstantString(const std::string &String); + virtual llvm::Constant *GenerateConstantString(const ObjCStringLiteral *SL); virtual llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD=0); @@ -899,8 +899,13 @@ llvm::Value *CGObjCMac::GetSelector(CGBuilderTy &Builder, Selector Sel) { */ llvm::Constant *CGObjCCommonMac::GenerateConstantString( - const std::string &String) { - return CGM.GetAddrOfConstantCFString(String); + const ObjCStringLiteral *SL) { + std::string Str(SL->getString()->getStrData(), + SL->getString()->getByteLength()); + if (SL->getString()->containsNonAscii()) { + // FIXME: Convert from UTF-8 to UTF-16. + } + return CGM.GetAddrOfConstantCFString(Str); } /// Generates a message send where the super is the receiver. This is diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h index 7c66ff6f7b..65bf52c5e2 100644 --- a/lib/CodeGen/CGObjCRuntime.h +++ b/lib/CodeGen/CGObjCRuntime.h @@ -48,6 +48,7 @@ namespace CodeGen { class ObjCProtocolDecl; class Selector; class ObjCIvarDecl; + class ObjCStringLiteral; namespace CodeGen { class CodeGenModule; @@ -72,7 +73,7 @@ public: Selector Sel) = 0; /// Generate a constant string object. - virtual llvm::Constant *GenerateConstantString(const std::string &String) = 0; + virtual llvm::Constant *GenerateConstantString(const ObjCStringLiteral *) = 0; /// Generate a category. A category contains a list of methods (and /// accompanying metadata) and a list of protocols.