From: Daniel Dunbar Date: Thu, 23 Jul 2009 22:52:48 +0000 (+0000) Subject: Factor out map lookup for CFString constants. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1d5529132e4620562cab931c1f84c24e42f02741;p=clang Factor out map lookup for CFString constants. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@76908 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index c417cf8ec9..994f60b015 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1190,45 +1190,60 @@ static void appendFieldAndPadding(CodeGenModule &CGM, } } -llvm::Constant *CodeGenModule:: -GetAddrOfConstantCFString(const StringLiteral *Literal) { - std::string str; - unsigned StringLength = 0; - - bool isUTF16 = false; - if (Literal->containsNonAsciiOrNull()) { - // Convert from UTF-8 to UTF-16. - llvm::SmallVector ToBuf(Literal->getByteLength()); - const UTF8 *FromPtr = (UTF8 *)Literal->getStrData(); - UTF16 *ToPtr = &ToBuf[0]; +static llvm::StringMapEntry & +GetConstantCFStringEntry(llvm::StringMap &Map, + const StringLiteral *Literal, + bool &IsUTF16, + unsigned &StringLength) { + unsigned NumBytes = Literal->getByteLength(); + + // Check for simple case. + if (!Literal->containsNonAsciiOrNull()) { + StringLength = NumBytes; + return Map.GetOrCreateValue(llvm::StringRef(Literal->getStrData(), + StringLength)); + } + + // Otherwise, convert the UTF8 literals into a byte string. + llvm::SmallVector ToBuf(NumBytes); + const UTF8 *FromPtr = (UTF8 *)Literal->getStrData(); + UTF16 *ToPtr = &ToBuf[0]; - ConversionResult Result; - Result = ConvertUTF8toUTF16(&FromPtr, FromPtr+Literal->getByteLength(), - &ToPtr, ToPtr+Literal->getByteLength(), - strictConversion); - if (Result == conversionOK) { - // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings - // without doing more surgery to this routine. Since we aren't explicitly - // checking for endianness here, it's also a bug (when generating code for - // a target that doesn't match the host endianness). Modeling this as an - // i16 array is likely the cleanest solution. - StringLength = ToPtr-&ToBuf[0]; - str.assign((char *)&ToBuf[0], StringLength*2);// Twice as many UTF8 chars. - isUTF16 = true; - } else { - assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed"); - // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string. - StringLength = Literal->getByteLength(); - str.assign(Literal->getStrData(), StringLength); - } - } else { - StringLength = Literal->getByteLength(); - str.assign(Literal->getStrData(), StringLength); + ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, + &ToPtr, ToPtr + NumBytes, + strictConversion); + + // Check for conversion failure. + if (Result != conversionOK) { + // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove + // this duplicate code. + assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed"); + StringLength = NumBytes; + return Map.GetOrCreateValue(llvm::StringRef(Literal->getStrData(), + StringLength)); } - llvm::Constant *&Entry = CFConstantStringMap[str]; + + // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings + // without doing more surgery to this routine. Since we aren't explicitly + // checking for endianness here, it's also a bug (when generating code for + // a target that doesn't match the host endianness). Modeling this as an + // i16 array is likely the cleanest solution. + StringLength = ToPtr - &ToBuf[0]; + IsUTF16 = true; + return Map.GetOrCreateValue(llvm::StringRef((char *)&ToBuf[0], + StringLength * 2)); +} + +llvm::Constant * +CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { + unsigned StringLength = 0; + bool isUTF16 = false; + llvm::StringMapEntry &Entry = + GetConstantCFStringEntry(CFConstantStringMap, Literal, isUTF16, + StringLength); - if (Entry) - return Entry; + if (llvm::Constant *C = Entry.getValue()) + return C; llvm::Constant *Zero = getLLVMContext().getNullValue(llvm::Type::Int32Ty); llvm::Constant *Zeros[] = { Zero, Zero }; @@ -1271,7 +1286,7 @@ GetAddrOfConstantCFString(const StringLiteral *Literal) { // String pointer. CurField = NextField; NextField = *Field++; - llvm::Constant *C = VMContext.getConstantArray(str); + llvm::Constant *C = VMContext.getConstantArray(Entry.getKey().str()); const char *Sect, *Prefix; bool isConstant; @@ -1318,7 +1333,7 @@ GetAddrOfConstantCFString(const StringLiteral *Literal) { "_unnamed_cfstring_"); if (const char *Sect = getContext().Target.getCFStringSection()) GV->setSection(Sect); - Entry = GV; + Entry.setValue(GV); return GV; }