From: Justin Lebar Date: Fri, 30 Sep 2016 00:38:45 +0000 (+0000) Subject: Move UTF functions into namespace llvm. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dd052623fdb8f7292a60486e46c498a4294ad581;p=clang Move UTF functions into namespace llvm. Summary: This lets people link against LLVM and their own version of the UTF library. I determined this only affects llvm, clang, lld, and lldb by running $ git grep -wl 'UTF[0-9]\+\|\bConvertUTF\bisLegalUTF\|getNumBytesFor' | cut -f 1 -d '/' | sort | uniq clang lld lldb llvm Tested with ninja lldb ninja check-clang check-llvm check-lld (ninja check-lldb doesn't complete for me with or without this patch.) Reviewers: rnk Subscribers: klimek, beanz, mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D24996 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@282822 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Analysis/FormatString.cpp b/lib/Analysis/FormatString.cpp index 0872e788c6..2a518cac39 100644 --- a/lib/Analysis/FormatString.cpp +++ b/lib/Analysis/FormatString.cpp @@ -266,14 +266,15 @@ bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( if (SpecifierBegin + 1 >= FmtStrEnd) return false; - const UTF8 *SB = reinterpret_cast(SpecifierBegin + 1); - const UTF8 *SE = reinterpret_cast(FmtStrEnd); + const llvm::UTF8 *SB = + reinterpret_cast(SpecifierBegin + 1); + const llvm::UTF8 *SE = reinterpret_cast(FmtStrEnd); const char FirstByte = *SB; // If the invalid specifier is a multibyte UTF-8 string, return the // total length accordingly so that the conversion specifier can be // properly updated to reflect a complete UTF-8 specifier. - unsigned NumBytes = getNumBytesForUTF8(FirstByte); + unsigned NumBytes = llvm::getNumBytesForUTF8(FirstByte); if (NumBytes == 1) return false; if (SB + NumBytes > SE) diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 43fa615aa0..9bf3ae2752 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -3136,13 +3136,12 @@ GetConstantCFStringEntry(llvm::StringMap &Map, // Otherwise, convert the UTF8 literals into a string of shorts. IsUTF16 = true; - SmallVector ToBuf(NumBytes + 1); // +1 for ending nulls. - const UTF8 *FromPtr = (const UTF8 *)String.data(); - UTF16 *ToPtr = &ToBuf[0]; + SmallVector ToBuf(NumBytes + 1); // +1 for ending nulls. + const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); + llvm::UTF16 *ToPtr = &ToBuf[0]; - (void)ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, - &ToPtr, ToPtr + NumBytes, - strictConversion); + (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, + ToPtr + NumBytes, llvm::strictConversion); // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h index 148f7fd0e9..b2103cb412 100644 --- a/lib/Format/Encoding.h +++ b/lib/Format/Encoding.h @@ -33,16 +33,17 @@ enum Encoding { /// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8, /// it is considered UTF8, otherwise we treat it as some 8-bit encoding. inline Encoding detectEncoding(StringRef Text) { - const UTF8 *Ptr = reinterpret_cast(Text.begin()); - const UTF8 *BufEnd = reinterpret_cast(Text.end()); - if (::isLegalUTF8String(&Ptr, BufEnd)) + const llvm::UTF8 *Ptr = reinterpret_cast(Text.begin()); + const llvm::UTF8 *BufEnd = reinterpret_cast(Text.end()); + if (llvm::isLegalUTF8String(&Ptr, BufEnd)) return Encoding_UTF8; return Encoding_Unknown; } inline unsigned getCodePointCountUTF8(StringRef Text) { unsigned CodePoints = 0; - for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) { + for (size_t i = 0, e = Text.size(); i < e; + i += llvm::getNumBytesForUTF8(Text[i])) { ++CodePoints; } return CodePoints; @@ -97,7 +98,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { switch (Encoding) { case Encoding_UTF8: - return getNumBytesForUTF8(FirstChar); + return llvm::getNumBytesForUTF8(FirstChar); default: return 1; } @@ -136,7 +137,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) { ++I; return I; } - return 1 + getNumBytesForUTF8(Text[1]); + return 1 + llvm::getNumBytesForUTF8(Text[1]); } } diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp index 6fdafdb6be..a4937386b9 100644 --- a/lib/Frontend/TextDiagnostic.cpp +++ b/lib/Frontend/TextDiagnostic.cpp @@ -119,16 +119,17 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *i, begin = reinterpret_cast(&*(SourceLine.begin() + *i)); end = begin + (SourceLine.size() - *i); - if (isLegalUTF8Sequence(begin, end)) { - UTF32 c; - UTF32 *cptr = &c; + if (llvm::isLegalUTF8Sequence(begin, end)) { + llvm::UTF32 c; + llvm::UTF32 *cptr = &c; unsigned char const *original_begin = begin; - unsigned char const *cp_end = begin+getNumBytesForUTF8(SourceLine[*i]); + unsigned char const *cp_end = + begin + llvm::getNumBytesForUTF8(SourceLine[*i]); - ConversionResult res = ConvertUTF8toUTF32(&begin, cp_end, &cptr, cptr+1, - strictConversion); + llvm::ConversionResult res = llvm::ConvertUTF8toUTF32( + &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion); (void)res; - assert(conversionOK==res); + assert(llvm::conversionOK == res); assert(0 < begin-original_begin && "we must be further along in the string now"); *i += begin-original_begin; diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 985ce7bd0a..37c7aa4c57 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -1485,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { const char *UnicodePtr = CurPtr; - UTF32 CodePoint; - ConversionResult Result = - llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, - (const UTF8 *)BufferEnd, + llvm::UTF32 CodePoint; + llvm::ConversionResult Result = + llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr, + (const llvm::UTF8 *)BufferEnd, &CodePoint, - strictConversion); - if (Result != conversionOK || + llvm::strictConversion); + if (Result != llvm::conversionOK || !isAllowedIDChar(static_cast(CodePoint), LangOpts)) return false; @@ -3625,17 +3625,17 @@ LexNextToken: break; } - UTF32 CodePoint; + llvm::UTF32 CodePoint; // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; - ConversionResult Status = - llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, - (const UTF8 *)BufferEnd, + llvm::ConversionResult Status = + llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, + (const llvm::UTF8 *)BufferEnd, &CodePoint, - strictConversion); - if (Status == conversionOK) { + llvm::strictConversion); + if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) return true; // KeepWhitespaceMode diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 51a8f287d1..582ed3ff47 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -402,7 +402,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *ResultPtr = reinterpret_cast(ResultBuf); + llvm::UTF32 *ResultPtr = reinterpret_cast(ResultBuf); *ResultPtr = UcnVal; ResultBuf += 4; return; @@ -411,7 +411,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, if (CharByteWidth == 2) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *ResultPtr = reinterpret_cast(ResultBuf); + llvm::UTF16 *ResultPtr = reinterpret_cast(ResultBuf); if (UcnVal <= (UTF32)0xFFFF) { *ResultPtr = UcnVal; @@ -1114,11 +1114,11 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, char const *tmp_in_start = start; uint32_t *tmp_out_start = buffer_begin; - ConversionResult res = - ConvertUTF8toUTF32(reinterpret_cast(&start), - reinterpret_cast(begin), - &buffer_begin, buffer_end, strictConversion); - if (res != conversionOK) { + llvm::ConversionResult res = + llvm::ConvertUTF8toUTF32(reinterpret_cast(&start), + reinterpret_cast(begin), + &buffer_begin, buffer_end, llvm::strictConversion); + if (res != llvm::conversionOK) { // If we see bad encoding for unprefixed character literals, warn and // simply copy the byte values, for compatibility with gcc and // older versions of clang. @@ -1510,13 +1510,13 @@ void StringLiteralParser::init(ArrayRef StringToks){ if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *ResultWidePtr = reinterpret_cast(ResultPtr); + llvm::UTF32 *ResultWidePtr = reinterpret_cast(ResultPtr); *ResultWidePtr = ResultChar; ResultPtr += 4; } else if (CharByteWidth == 2) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *ResultWidePtr = reinterpret_cast(ResultPtr); + llvm::UTF16 *ResultWidePtr = reinterpret_cast(ResultPtr); *ResultWidePtr = ResultChar & 0xFFFF; ResultPtr += 2; } else { @@ -1531,12 +1531,12 @@ void StringLiteralParser::init(ArrayRef StringToks){ if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *ResultWidePtr = reinterpret_cast(ResultBuf.data()); + llvm::UTF32 *ResultWidePtr = reinterpret_cast(ResultBuf.data()); ResultWidePtr[0] = GetNumStringChars() - 1; } else if (CharByteWidth == 2) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *ResultWidePtr = reinterpret_cast(ResultBuf.data()); + llvm::UTF16 *ResultWidePtr = reinterpret_cast(ResultBuf.data()); ResultWidePtr[0] = GetNumStringChars() - 1; } else { assert(CharByteWidth == 1 && "Unexpected char width"); @@ -1570,7 +1570,7 @@ void StringLiteralParser::init(ArrayRef StringToks){ static const char *resyncUTF8(const char *Err, const char *End) { if (Err == End) return End; - End = Err + std::min(getNumBytesForUTF8(*Err), End-Err); + End = Err + std::min(llvm::getNumBytesForUTF8(*Err), End-Err); while (++Err != End && (*Err & 0xC0) == 0x80) ; return Err; @@ -1582,7 +1582,7 @@ static const char *resyncUTF8(const char *Err, const char *End) { bool StringLiteralParser::CopyStringFragment(const Token &Tok, const char *TokBegin, StringRef Fragment) { - const UTF8 *ErrorPtrTmp; + const llvm::UTF8 *ErrorPtrTmp; if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp)) return false; diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 7ea34f9890..bb2bfd33be 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -3262,15 +3262,15 @@ bool Sema::CheckObjCString(Expr *Arg) { if (Literal->containsNonAsciiOrNull()) { StringRef String = Literal->getString(); unsigned NumBytes = String.size(); - SmallVector ToBuf(NumBytes); - const UTF8 *FromPtr = (const UTF8 *)String.data(); - UTF16 *ToPtr = &ToBuf[0]; - - ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, - &ToPtr, ToPtr + NumBytes, - strictConversion); + SmallVector ToBuf(NumBytes); + const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); + llvm::UTF16 *ToPtr = &ToBuf[0]; + + llvm::ConversionResult Result = + llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, + ToPtr + NumBytes, llvm::strictConversion); // Check for conversion failure. - if (Result != conversionOK) + if (Result != llvm::conversionOK) Diag(Arg->getLocStart(), diag::warn_cfstring_truncated) << Arg->getSourceRange(); } @@ -4777,16 +4777,16 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, // hex value. std::string CodePointStr; if (!llvm::sys::locale::isPrint(*csStart)) { - UTF32 CodePoint; - const UTF8 **B = reinterpret_cast(&csStart); - const UTF8 *E = - reinterpret_cast(csStart + csLen); - ConversionResult Result = - llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion); - - if (Result != conversionOK) { + llvm::UTF32 CodePoint; + const llvm::UTF8 **B = reinterpret_cast(&csStart); + const llvm::UTF8 *E = + reinterpret_cast(csStart + csLen); + llvm::ConversionResult Result = + llvm::convertUTF8Sequence(B, E, &CodePoint, llvm::strictConversion); + + if (Result != llvm::conversionOK) { unsigned char FirstChar = *csStart; - CodePoint = (UTF32)FirstChar; + CodePoint = (llvm::UTF32)FirstChar; } llvm::raw_string_ostream OS(CodePointStr); diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index f34263dba5..2ef2fc9881 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -3070,8 +3070,9 @@ static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, SmallString<32> &Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char *ResultPtr = &Target[0]; - const UTF8 *ErrorPtr; - bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); + const llvm::UTF8 *ErrorPtr; + bool success = + llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); (void)success; assert(success); Target.resize(ResultPtr - &Target[0]);