From 84392d0edc7127f868d3c97484ffc9d789c317ff Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 30 Mar 2012 00:26:17 +0000 Subject: [PATCH] The UTF16 string referenced by a CFString should go into the __TEXT,__ustring section. A 'normal' string will go into the __TEXT,__const section, but this isn't good for UTF16 strings. The __ustring section allows for coalescing, among other niceties (such as allowing the linker to easily split up strings). Instead of outputting the UTF16 string as a series of bytes, output it as a series of shorts. The back-end will then nicely place the UTF16 string into the correct section, because it's a mensch. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@153710 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGenModule.cpp | 50 +++++++++++++-------------- test/CodeGen/darwin-string-literals.c | 7 ++-- test/CodeGen/utf16-cfstrings.c | 10 ++++++ test/CodeGenObjC/2009-08-05-utf16.m | 2 +- 4 files changed, 40 insertions(+), 29 deletions(-) create mode 100644 test/CodeGen/utf16-cfstrings.c diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 6fa33b6c12..c9f1066032 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1903,8 +1903,10 @@ GetConstantCFStringEntry(llvm::StringMap &Map, return Map.GetOrCreateValue(String); } - // Otherwise, convert the UTF8 literals into a byte string. - SmallVector ToBuf(NumBytes); + // Otherwise, convert the UTF8 literals into a string of shorts. + IsUTF16 = true; + + SmallVector ToBuf(NumBytes + 1); // +1 for ending nulls. const UTF8 *FromPtr = (UTF8 *)String.data(); UTF16 *ToPtr = &ToBuf[0]; @@ -1915,28 +1917,11 @@ GetConstantCFStringEntry(llvm::StringMap &Map, // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; - // Render the UTF-16 string into a byte array and convert to the target byte - // order. - // - // FIXME: This isn't something we should need to do here. - SmallString<128> AsBytes; - AsBytes.reserve(StringLength * 2); - for (unsigned i = 0; i != StringLength; ++i) { - unsigned short Val = ToBuf[i]; - if (TargetIsLSB) { - AsBytes.push_back(Val & 0xFF); - AsBytes.push_back(Val >> 8); - } else { - AsBytes.push_back(Val >> 8); - AsBytes.push_back(Val & 0xFF); - } - } - // Append one extra null character, the second is automatically added by our - // caller. - AsBytes.push_back(0); - - IsUTF16 = true; - return Map.GetOrCreateValue(StringRef(AsBytes.data(), AsBytes.size())); + // Add an explicit null. + *ToPtr = 0; + return Map. + GetOrCreateValue(StringRef(reinterpret_cast(ToBuf.data()), + (StringLength + 1) * 2)); } static llvm::StringMapEntry & @@ -1990,8 +1975,15 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm::ConstantInt::get(Ty, 0x07C8); // String pointer. - llvm::Constant *C = llvm::ConstantDataArray::getString(VMContext, - Entry.getKey()); + llvm::Constant *C = 0; + if (isUTF16) { + ArrayRef Arr = + llvm::makeArrayRef((uint16_t*)Entry.getKey().data(), + Entry.getKey().size() / 2); + C = llvm::ConstantDataArray::get(VMContext, Arr); + } else { + C = llvm::ConstantDataArray::getString(VMContext, Entry.getKey()); + } llvm::GlobalValue::LinkageTypes Linkage; if (isUTF16) @@ -2016,8 +2008,14 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy); GV->setAlignment(Align.getQuantity()); } + + // String. Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV, Zeros); + if (isUTF16) + // Cast the UTF16 string to the correct type. + Fields[2] = llvm::ConstantExpr::getBitCast(Fields[2], Int8PtrTy); + // String length. Ty = getTypes().ConvertType(getContext().LongTy); Fields[3] = llvm::ConstantInt::get(Ty, StringLength); diff --git a/test/CodeGen/darwin-string-literals.c b/test/CodeGen/darwin-string-literals.c index 6f9e0d2a63..968386a90e 100644 --- a/test/CodeGen/darwin-string-literals.c +++ b/test/CodeGen/darwin-string-literals.c @@ -2,13 +2,16 @@ // CHECK-LSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00" // CHECK-LSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00" -// CHECK-LSB: @.str2 = internal unnamed_addr constant [36 x i8] c"h\00e\00l\00l\00o\00 \00\92! \00\03& \00\90! \00w\00o\00r\00l\00d\00\00\00", align 2 +// CHECK-LSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2 +// CHECK-LSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2 + // RUN: %clang_cc1 -triple powerpc-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix MSB %s // CHECK-MSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00" // CHECK-MSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00" -// CHECK-MSB: @.str2 = internal unnamed_addr constant [36 x i8] c"\00h\00e\00l\00l\00o\00 !\92\00 &\03\00 !\90\00 \00w\00o\00r\00l\00d\00\00", align 2 +// CHECK-MSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2 +// CHECK-MSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2 const char *g0 = "string0"; const void *g1 = __builtin___CFStringMakeConstantString("string1"); diff --git a/test/CodeGen/utf16-cfstrings.c b/test/CodeGen/utf16-cfstrings.c new file mode 100644 index 0000000000..d4f214b0d6 --- /dev/null +++ b/test/CodeGen/utf16-cfstrings.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s +// + +// CHECK: @.str = internal unnamed_addr constant [9 x i16] [i16 252, i16 98, i16 101, i16 114, i16 104, i16 117, i16 110, i16 100, i16 0], align 2 + +#define CFSTR __builtin___CFStringMakeConstantString + +void foo() { + CFSTR("überhund"); +} diff --git a/test/CodeGenObjC/2009-08-05-utf16.m b/test/CodeGenObjC/2009-08-05-utf16.m index 38c9c82e78..06458e730a 100644 --- a/test/CodeGenObjC/2009-08-05-utf16.m +++ b/test/CodeGenObjC/2009-08-05-utf16.m @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -emit-llvm -w -x objective-c %s -o - | FileCheck %s // rdar://7095855 rdar://7115749 -// CHECK: internal unnamed_addr constant [12 x i8] +// CHECK: internal unnamed_addr constant [6 x i16] [i16 105, i16 80, i16 111, i16 100, i16 8482, i16 0], align 2 void *P = @"iPod™"; -- 2.40.0