From: Steve Naroff Date: Mon, 13 Apr 2009 19:08:08 +0000 (+0000) Subject: Fixed crasher in [irgen] Assertion failed: (Result == conver... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=aa4a756185e77755aaa10ae50db08ae5be58e70a;p=clang Fixed crasher in [irgen] Assertion failed: (Result == conversionOK && "UTF-8 to UTF-16 conversion failed"), function GetAddrOfConstantCFString, file CodeGenModule.cpp, line 1063. Still a diagnostic related FIXME (will discuss with Daniel/Fariborz offline). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@68975 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index ce893dbcfd..797c0cef00 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1060,16 +1060,25 @@ GetAddrOfConstantCFString(const StringLiteral *Literal) { Result = ConvertUTF8toUTF16(&FromPtr, FromPtr+Literal->getByteLength(), &ToPtr, ToPtr+Literal->getByteLength(), strictConversion); - assert(Result == conversionOK && "UTF-8 to UTF-16 conversion failed"); + if (Result == conversionOK) { + // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings + // without doing more surgery to this routine. Since we aren't explicitly + // checking for endianness here, it's also a bug (when generating code for + // a target that doesn't match the host endianness). Modeling this as an + // i16 array is likely the cleanest solution. + StringLength = ToPtr-&ToBuf[0]; + str.assign((char *)&ToBuf[0], StringLength*2);// Twice as many UTF8 chars. + isUTF16 = true; + } else if (Result == sourceIllegal) { + // FIXME: GCC currently emits the following warning (in the backend): + // "warning: input conversion stopped due to an input byte that does not + // belong to the input codeset UTF-8" + // The clang backend doesn't currently emit any warnings. + str.assign(Literal->getStrData(), Literal->getByteLength()); + StringLength = str.length(); + } else + assert(Result == conversionOK && "UTF-8 to UTF-16 conversion failed"); - // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings - // without doing more surgery to this routine. Since we aren't explicitly - // checking for endianness here, it's also a bug (when generating code for - // a target that doesn't match the host endianness). Modeling this as an i16 - // array is likely the cleanest solution. - StringLength = ToPtr-&ToBuf[0]; - str.assign((char *)&ToBuf[0], StringLength*2); // Twice as many UTF8 chars. - isUTF16 = true; } else { str.assign(Literal->getStrData(), Literal->getByteLength()); StringLength = str.length(); diff --git a/test/CodeGen/illegal-UTF8.m b/test/CodeGen/illegal-UTF8.m new file mode 100644 index 0000000000..88467b6d41 --- /dev/null +++ b/test/CodeGen/illegal-UTF8.m @@ -0,0 +1,8 @@ +// RUN: clang %s -S -m64 + +@class NSString; + +// FIXME: GCC emits the following warning: +// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8 + +NSString *S = @"\xff\xff___WAIT___";