Fixed crasher in <rdar://problem/6780904> [irgen] Assertion failed: (Result == conver...

author Steve Naroff <snaroff@apple.com>

Mon, 13 Apr 2009 19:08:08 +0000 (19:08 +0000)

committer Steve Naroff <snaroff@apple.com>

Mon, 13 Apr 2009 19:08:08 +0000 (19:08 +0000)
author Steve Naroff <snaroff@apple.com>
Mon, 13 Apr 2009 19:08:08 +0000 (19:08 +0000)
committer Steve Naroff <snaroff@apple.com>
Mon, 13 Apr 2009 19:08:08 +0000 (19:08 +0000)
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp

index ce893dbcfd6be88b20e8b45869d4ba340037e852..797c0cef0005858b5939cf5808ce94e8bf8927d9 100644 (file)
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1060,16 +1060,25 @@ GetAddrOfConstantCFString(const StringLiteral *Literal) {
      Result = ConvertUTF8toUTF16(&FromPtr, FromPtr+Literal->getByteLength(),
                                  &ToPtr, ToPtr+Literal->getByteLength(),
                                  strictConversion);
-    assert(Result == conversionOK && "UTF-8 to UTF-16 conversion failed");
+    if (Result == conversionOK) {
+      // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings
+      // without doing more surgery to this routine. Since we aren't explicitly
+      // checking for endianness here, it's also a bug (when generating code for
+      // a target that doesn't match the host endianness). Modeling this as an
+      // i16 array is likely the cleanest solution.
+      StringLength = ToPtr-&ToBuf[0];
+      str.assign((char *)&ToBuf[0], StringLength*2);// Twice as many UTF8 chars.
+      isUTF16 = true;
+    } else if (Result == sourceIllegal) {
+      // FIXME: GCC currently emits the following warning (in the backend):
+      // "warning: input conversion stopped due to an input byte that does not 
+      //           belong to the input codeset UTF-8"
+      // The clang backend doesn't currently emit any warnings.
+      str.assign(Literal->getStrData(), Literal->getByteLength());
+      StringLength = str.length();
+    } else
+      assert(Result == conversionOK && "UTF-8 to UTF-16 conversion failed");
      
-    // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings
-    // without doing more surgery to this routine. Since we aren't explicitly
-    // checking for endianness here, it's also a bug (when generating code for
-    // a target that doesn't match the host endianness). Modeling this as an i16
-    // array is likely the cleanest solution.
-    StringLength = ToPtr-&ToBuf[0];
-    str.assign((char *)&ToBuf[0], StringLength*2); // Twice as many UTF8 chars.
-    isUTF16 = true;
    } else {
      str.assign(Literal->getStrData(), Literal->getByteLength());
      StringLength = str.length();
diff --git a/test/CodeGen/illegal-UTF8.m b/test/CodeGen/illegal-UTF8.m

new file mode 100644 (file)

index 0000000..88467b6
--- /dev/null
+++ b/test/CodeGen/illegal-UTF8.m
@@ -0,0 +1,8 @@
+// RUN: clang %s -S -m64
+
+@class NSString;
+
+// FIXME: GCC emits the following warning:
+// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8
+
+NSString *S = @"\xff\xff___WAIT___";
author	Steve Naroff <snaroff@apple.com>
	Mon, 13 Apr 2009 19:08:08 +0000 (19:08 +0000)
committer	Steve Naroff <snaroff@apple.com>
	Mon, 13 Apr 2009 19:08:08 +0000 (19:08 +0000)
lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
test/CodeGen/illegal-UTF8.m	[new file with mode: 0644]	patch \| blob