Move UTF functions into namespace llvm.

author Justin Lebar <jlebar@google.com>

Fri, 30 Sep 2016 00:38:45 +0000 (00:38 +0000)

committer Justin Lebar <jlebar@google.com>

Fri, 30 Sep 2016 00:38:45 +0000 (00:38 +0000)
author Justin Lebar <jlebar@google.com>
Fri, 30 Sep 2016 00:38:45 +0000 (00:38 +0000)
committer Justin Lebar <jlebar@google.com>
Fri, 30 Sep 2016 00:38:45 +0000 (00:38 +0000)
diff --git a/lib/Analysis/FormatString.cpp b/lib/Analysis/FormatString.cpp

index 0872e788c60b72d82bbaa6e406aea47131c460d9..2a518cac3943169d0918c0c471603135178f9453 100644 (file)
--- a/lib/Analysis/FormatString.cpp
+++ b/lib/Analysis/FormatString.cpp
@@ -266,14 +266,15 @@ bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
    if (SpecifierBegin + 1 >= FmtStrEnd)
      return false;
  
-  const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
-  const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+  const llvm::UTF8 *SB =
+      reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1);
+  const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd);
    const char FirstByte = *SB;
  
    // If the invalid specifier is a multibyte UTF-8 string, return the
    // total length accordingly so that the conversion specifier can be
    // properly updated to reflect a complete UTF-8 specifier.
-  unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+  unsigned NumBytes = llvm::getNumBytesForUTF8(FirstByte);
    if (NumBytes == 1)
      return false;
    if (SB + NumBytes > SE)
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp

index 43fa615aa0f824a95028a317db240b1bf1e8c5cf..9bf3ae2752f99325315ef1a65a67ef970873ad1f 100644 (file)
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -3136,13 +3136,12 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::GlobalVariable *> &Map,
    // Otherwise, convert the UTF8 literals into a string of shorts.
    IsUTF16 = true;
  
-  SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
-  const UTF8 *FromPtr = (const UTF8 *)String.data();
-  UTF16 *ToPtr = &ToBuf[0];
+  SmallVector<llvm::UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
+  const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data();
+  llvm::UTF16 *ToPtr = &ToBuf[0];
  
-  (void)ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
-                           &ToPtr, ToPtr + NumBytes,
-                           strictConversion);
+  (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr,
+                                 ToPtr + NumBytes, llvm::strictConversion);
  
    // ConvertUTF8toUTF16 returns the length in ToPtr.
    StringLength = ToPtr - &ToBuf[0];
diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h

index 148f7fd0e91ba5dc1ee0572ebd02b4ba5d34979b..b2103cb412eed8117382e7eb3802302b1029b327 100644 (file)
--- a/lib/Format/Encoding.h
+++ b/lib/Format/Encoding.h
@@ -33,16 +33,17 @@ enum Encoding {
  /// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,
  /// it is considered UTF8, otherwise we treat it as some 8-bit encoding.
  inline Encoding detectEncoding(StringRef Text) {
-  const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin());
-  const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end());
-  if (::isLegalUTF8String(&Ptr, BufEnd))
+  const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin());
+  const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end());
+  if (llvm::isLegalUTF8String(&Ptr, BufEnd))
      return Encoding_UTF8;
    return Encoding_Unknown;
  }
  
  inline unsigned getCodePointCountUTF8(StringRef Text) {
    unsigned CodePoints = 0;
-  for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) {
+  for (size_t i = 0, e = Text.size(); i < e;
+       i += llvm::getNumBytesForUTF8(Text[i])) {
      ++CodePoints;
    }
    return CodePoints;
@@ -97,7 +98,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
  inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
    switch (Encoding) {
    case Encoding_UTF8:
-    return getNumBytesForUTF8(FirstChar);
+    return llvm::getNumBytesForUTF8(FirstChar);
    default:
      return 1;
    }
@@ -136,7 +137,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) {
          ++I;
        return I;
      }
-    return 1 + getNumBytesForUTF8(Text[1]);
+    return 1 + llvm::getNumBytesForUTF8(Text[1]);
    }
  }
  
diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp

index 6fdafdb6be0b1644cf3769ca6c16af19ee6f0b07..a4937386b93f0f2cbddd31211791ae1153260437 100644 (file)
--- a/lib/Frontend/TextDiagnostic.cpp
+++ b/lib/Frontend/TextDiagnostic.cpp
@@ -119,16 +119,17 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *i,
    begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i));
    end = begin + (SourceLine.size() - *i);
    
-  if (isLegalUTF8Sequence(begin, end)) {
-    UTF32 c;
-    UTF32 *cptr = &c;
+  if (llvm::isLegalUTF8Sequence(begin, end)) {
+    llvm::UTF32 c;
+    llvm::UTF32 *cptr = &c;
      unsigned char const *original_begin = begin;
-    unsigned char const *cp_end = begin+getNumBytesForUTF8(SourceLine[*i]);
+    unsigned char const *cp_end =
+        begin + llvm::getNumBytesForUTF8(SourceLine[*i]);
  
-    ConversionResult res = ConvertUTF8toUTF32(&begin, cp_end, &cptr, cptr+1,
-                                              strictConversion);
+    llvm::ConversionResult res = llvm::ConvertUTF8toUTF32(
+        &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion);
      (void)res;
-    assert(conversionOK==res);
+    assert(llvm::conversionOK == res);
      assert(0 < begin-original_begin
             && "we must be further along in the string now");
      *i += begin-original_begin;
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp

index 985ce7bd0aee430311f4b2fa85ea6743edeb63d7..37c7aa4c577e47278ce69184b46e97cea6ce748d 100644 (file)
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1485,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
  
  bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
    const char *UnicodePtr = CurPtr;
-  UTF32 CodePoint;
-  ConversionResult Result =
-      llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
-                                (const UTF8 *)BufferEnd,
+  llvm::UTF32 CodePoint;
+  llvm::ConversionResult Result =
+      llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr,
+                                (const llvm::UTF8 *)BufferEnd,
                                  &CodePoint,
-                                strictConversion);
-  if (Result != conversionOK ||
+                                llvm::strictConversion);
+  if (Result != llvm::conversionOK ||
        !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
      return false;
  
@@ -3625,17 +3625,17 @@ LexNextToken:
        break;
      }
  
-    UTF32 CodePoint;
+    llvm::UTF32 CodePoint;
  
      // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
      // an escaped newline.
      --CurPtr;
-    ConversionResult Status =
-        llvm::convertUTF8Sequence((const UTF8 **)&CurPtr,
-                                  (const UTF8 *)BufferEnd,
+    llvm::ConversionResult Status =
+        llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
+                                  (const llvm::UTF8 *)BufferEnd,
                                    &CodePoint,
-                                  strictConversion);
-    if (Status == conversionOK) {
+                                  llvm::strictConversion);
+    if (Status == llvm::conversionOK) {
        if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
            return true; // KeepWhitespaceMode
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp

index 51a8f287d1ed493e4c6536bc2fd584597c404dc0..582ed3ff4721603ae591bc46b776274aa5ee509d 100644 (file)
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -402,7 +402,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
    if (CharByteWidth == 4) {
      // FIXME: Make the type of the result buffer correct instead of
      // using reinterpret_cast.
-    UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf);
+    llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
      *ResultPtr = UcnVal;
      ResultBuf += 4;
      return;
@@ -411,7 +411,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
    if (CharByteWidth == 2) {
      // FIXME: Make the type of the result buffer correct instead of
      // using reinterpret_cast.
-    UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf);
+    llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);
  
      if (UcnVal <= (UTF32)0xFFFF) {
        *ResultPtr = UcnVal;
@@ -1114,11 +1114,11 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
  
        char const *tmp_in_start = start;
        uint32_t *tmp_out_start = buffer_begin;
-      ConversionResult res =
-          ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
-                             reinterpret_cast<UTF8 const *>(begin),
-                             &buffer_begin, buffer_end, strictConversion);
-      if (res != conversionOK) {
+      llvm::ConversionResult res =
+          llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
+                             reinterpret_cast<llvm::UTF8 const *>(begin),
+                             &buffer_begin, buffer_end, llvm::strictConversion);
+      if (res != llvm::conversionOK) {
          // If we see bad encoding for unprefixed character literals, warn and
          // simply copy the byte values, for compatibility with gcc and
          // older versions of clang.
@@ -1510,13 +1510,13 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
          if (CharByteWidth == 4) {
            // FIXME: Make the type of the result buffer correct instead of
            // using reinterpret_cast.
-          UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr);
+          llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
            *ResultWidePtr = ResultChar;
            ResultPtr += 4;
          } else if (CharByteWidth == 2) {
            // FIXME: Make the type of the result buffer correct instead of
            // using reinterpret_cast.
-          UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr);
+          llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
            *ResultWidePtr = ResultChar & 0xFFFF;
            ResultPtr += 2;
          } else {
@@ -1531,12 +1531,12 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
      if (CharByteWidth == 4) {
        // FIXME: Make the type of the result buffer correct instead of
        // using reinterpret_cast.
-      UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data());
+      llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
        ResultWidePtr[0] = GetNumStringChars() - 1;
      } else if (CharByteWidth == 2) {
        // FIXME: Make the type of the result buffer correct instead of
        // using reinterpret_cast.
-      UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data());
+      llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
        ResultWidePtr[0] = GetNumStringChars() - 1;
      } else {
        assert(CharByteWidth == 1 && "Unexpected char width");
@@ -1570,7 +1570,7 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
  static const char *resyncUTF8(const char *Err, const char *End) {
    if (Err == End)
      return End;
-  End = Err + std::min<unsigned>(getNumBytesForUTF8(*Err), End-Err);
+  End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
    while (++Err != End && (*Err & 0xC0) == 0x80)
      ;
    return Err;
@@ -1582,7 +1582,7 @@ static const char *resyncUTF8(const char *Err, const char *End) {
  bool StringLiteralParser::CopyStringFragment(const Token &Tok,
                                               const char *TokBegin,
                                               StringRef Fragment) {
-  const UTF8 *ErrorPtrTmp;
+  const llvm::UTF8 *ErrorPtrTmp;
    if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
      return false;
  
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp

index 7ea34f989085f287a6f9cb94f263d5e17f82cda2..bb2bfd33be5999c023a18dcdec5cca61a2cb577d 100644 (file)
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -3262,15 +3262,15 @@ bool Sema::CheckObjCString(Expr *Arg) {
    if (Literal->containsNonAsciiOrNull()) {
      StringRef String = Literal->getString();
      unsigned NumBytes = String.size();
-    SmallVector<UTF16, 128> ToBuf(NumBytes);
-    const UTF8 *FromPtr = (const UTF8 *)String.data();
-    UTF16 *ToPtr = &ToBuf[0];
-    
-    ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
-                                                 &ToPtr, ToPtr + NumBytes,
-                                                 strictConversion);
+    SmallVector<llvm::UTF16, 128> ToBuf(NumBytes);
+    const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data();
+    llvm::UTF16 *ToPtr = &ToBuf[0];
+
+    llvm::ConversionResult Result =
+        llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr,
+                                 ToPtr + NumBytes, llvm::strictConversion);
      // Check for conversion failure.
-    if (Result != conversionOK)
+    if (Result != llvm::conversionOK)
        Diag(Arg->getLocStart(),
             diag::warn_cfstring_truncated) << Arg->getSourceRange();
    }
@@ -4777,16 +4777,16 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
    // hex value.
    std::string CodePointStr;
    if (!llvm::sys::locale::isPrint(*csStart)) {
-    UTF32 CodePoint;
-    const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
-    const UTF8 *E =
-        reinterpret_cast<const UTF8 *>(csStart + csLen);
-    ConversionResult Result =
-        llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
-
-    if (Result != conversionOK) {
+    llvm::UTF32 CodePoint;
+    const llvm::UTF8 **B = reinterpret_cast<const llvm::UTF8 **>(&csStart);
+    const llvm::UTF8 *E =
+        reinterpret_cast<const llvm::UTF8 *>(csStart + csLen);
+    llvm::ConversionResult Result =
+        llvm::convertUTF8Sequence(B, E, &CodePoint, llvm::strictConversion);
+
+    if (Result != llvm::conversionOK) {
        unsigned char FirstChar = *csStart;
-      CodePoint = (UTF32)FirstChar;
+      CodePoint = (llvm::UTF32)FirstChar;
      }
  
      llvm::raw_string_ostream OS(CodePointStr);
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp

index f34263dba51037190565baa05be25dd6aef277ba..2ef2fc9881613af0bb576142fd24f18fdb31aa8e 100644 (file)
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -3070,8 +3070,9 @@ static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
                                      SmallString<32> &Target) {
    Target.resize(CharByteWidth * (Source.size() + 1));
    char *ResultPtr = &Target[0];
-  const UTF8 *ErrorPtr;
-  bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
+  const llvm::UTF8 *ErrorPtr;
+  bool success =
+      llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
    (void)success;
    assert(success);
    Target.resize(ResultPtr - &Target[0]);
author	Justin Lebar <jlebar@google.com>
	Fri, 30 Sep 2016 00:38:45 +0000 (00:38 +0000)
committer	Justin Lebar <jlebar@google.com>
	Fri, 30 Sep 2016 00:38:45 +0000 (00:38 +0000)
lib/Analysis/FormatString.cpp		patch \| blob \| history
lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
lib/Format/Encoding.h		patch \| blob \| history
lib/Frontend/TextDiagnostic.cpp		patch \| blob \| history
lib/Lex/Lexer.cpp		patch \| blob \| history
lib/Lex/LiteralSupport.cpp		patch \| blob \| history
lib/Sema/SemaChecking.cpp		patch \| blob \| history
lib/Sema/SemaExpr.cpp		patch \| blob \| history