From 941e47cef26fb6300a8d3e366f7c5694277e5849 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 3 Jul 2012 02:24:52 +0000
Subject: [PATCH] Share ConvertUTF8toWide() between Lex and CodeGen.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159634 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/clang/Basic/ConvertUTF.h | 19 +++++++++++
 lib/Basic/CMakeLists.txt         |  1 +
 lib/Basic/ConvertUTFWrapper.cpp  | 54 ++++++++++++++++++++++++++++++++
 lib/CodeGen/CGExpr.cpp           | 39 ++---------------------
 lib/Lex/LiteralSupport.cpp       | 37 +---------------------
 5 files changed, 77 insertions(+), 73 deletions(-)
 create mode 100644 lib/Basic/ConvertUTFWrapper.cpp
diff --git a/include/clang/Basic/ConvertUTF.h b/include/clang/Basic/ConvertUTF.h
index 7fb5874027..53d451485d 100644
--- a/include/clang/Basic/ConvertUTF.h
+++ b/include/clang/Basic/ConvertUTF.h
@@ -159,6 +159,25 @@ Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd);
 
 #ifdef __cplusplus
 }
+
+/*************************************************************************/
+/* Below are LLVM-specific wrappers of the functions above. */
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+
+/**
+ * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
+ * WideCharWidth. The converted data is written to ResultPtr, which needs to
+ * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
+ * ResultPtr will point one after the end of the copied string.
+ * \return true on success.
+ */
+bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
+                       char *&ResultPtr);
+
+}
 #endif
 
 #endif
diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt
index 0169539736..611ae14438 100644
--- a/lib/Basic/CMakeLists.txt
+++ b/lib/Basic/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS mc)
 add_clang_library(clangBasic
   Builtins.cpp
   ConvertUTF.c
+  ConvertUTFWrapper.cpp
   Diagnostic.cpp
   DiagnosticIDs.cpp
   FileManager.cpp
diff --git a/lib/Basic/ConvertUTFWrapper.cpp b/lib/Basic/ConvertUTFWrapper.cpp
new file mode 100644
index 0000000000..42b4f58079
--- /dev/null
+++ b/lib/Basic/ConvertUTFWrapper.cpp
@@ -0,0 +1,54 @@
+//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/ConvertUTF.h"
+#include "clang/Basic/LLVM.h"
+
+namespace clang {
+
+bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
+                       char *&ResultPtr) {
+  assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
+  ConversionResult result = conversionOK;
+  // Copy the character span over.
+  if (WideCharWidth == 1) {
+    if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Source.begin()),
+                           reinterpret_cast<const UTF8*>(Source.end())))
+      result = sourceIllegal;
+    memcpy(ResultPtr, Source.data(), Source.size());
+    ResultPtr += Source.size();
+  } else if (WideCharWidth == 2) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF16(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 2*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+  } else if (WideCharWidth == 4) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF32(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 4*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+  }
+  assert((result != targetExhausted)
+         && "ConvertUTF8toUTFXX exhausted target buffer");
+  return result == conversionOK;
+}
+
+}
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 5f708d7a73..7fbe0d02b5 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -1732,47 +1732,12 @@ GetAddrOfConstantWideString(StringRef Str,
   return GV;
 }
 
-// FIXME: Mostly copied from StringLiteralParser::CopyStringFragment
 static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
                                     SmallString<32>& Target) {
   Target.resize(CharByteWidth * (Source.size() + 1));
   char* ResultPtr = &Target[0];
-
-  assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4);
-  ConversionResult result = conversionOK;
-  // Copy the character span over.
-  if (CharByteWidth == 1) {
-    if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(&*Source.begin()),
-                           reinterpret_cast<const UTF8*>(&*Source.end())))
-      result = sourceIllegal;
-    memcpy(ResultPtr, Source.data(), Source.size());
-    ResultPtr += Source.size();
-  } else if (CharByteWidth == 2) {
-    UTF8 const *sourceStart = (UTF8 const *)Source.data();
-    // FIXME: Make the type of the result buffer correct instead of
-    // using reinterpret_cast.
-    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
-    ConversionFlags flags = strictConversion;
-    result = ConvertUTF8toUTF16(
-      &sourceStart,sourceStart + Source.size(),
-        &targetStart,targetStart + 2*Source.size(),flags);
-    if (result==conversionOK)
-      ResultPtr = reinterpret_cast<char*>(targetStart);
-  } else if (CharByteWidth == 4) {
-    UTF8 const *sourceStart = (UTF8 const *)Source.data();
-    // FIXME: Make the type of the result buffer correct instead of
-    // using reinterpret_cast.
-    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
-    ConversionFlags flags = strictConversion;
-    result = ConvertUTF8toUTF32(
-        &sourceStart,sourceStart + Source.size(),
-        &targetStart,targetStart + 4*Source.size(),flags);
-    if (result==conversionOK)
-      ResultPtr = reinterpret_cast<char*>(targetStart);
-  }
-  assert((result != targetExhausted)
-         && "ConvertUTF8toUTFXX exhausted target buffer");
-  assert(result == conversionOK);
+  bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr);
+  assert(success);
   Target.resize(ResultPtr - &Target[0]);
 }
 
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index b2290b3187..9e3c7786a7 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -1330,45 +1330,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
   }
 }
 
-
 /// copyStringFragment - This function copies from Start to End into ResultPtr.
 /// Performs widening for multi-byte characters.
 bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
-  assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4);
-  ConversionResult result = conversionOK;
-  // Copy the character span over.
-  if (CharByteWidth == 1) {
-    if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()),
-                           reinterpret_cast<const UTF8*>(Fragment.end())))
-      result = sourceIllegal;
-    memcpy(ResultPtr, Fragment.data(), Fragment.size());
-    ResultPtr += Fragment.size();
-  } else if (CharByteWidth == 2) {
-    UTF8 const *sourceStart = (UTF8 const *)Fragment.data();
-    // FIXME: Make the type of the result buffer correct instead of
-    // using reinterpret_cast.
-    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
-    ConversionFlags flags = strictConversion;
-    result = ConvertUTF8toUTF16(
-	    &sourceStart,sourceStart + Fragment.size(),
-        &targetStart,targetStart + 2*Fragment.size(),flags);
-    if (result==conversionOK)
-      ResultPtr = reinterpret_cast<char*>(targetStart);
-  } else if (CharByteWidth == 4) {
-    UTF8 const *sourceStart = (UTF8 const *)Fragment.data();
-    // FIXME: Make the type of the result buffer correct instead of
-    // using reinterpret_cast.
-    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
-    ConversionFlags flags = strictConversion;
-    result = ConvertUTF8toUTF32(
-        &sourceStart,sourceStart + Fragment.size(),
-        &targetStart,targetStart + 4*Fragment.size(),flags);
-    if (result==conversionOK)
-      ResultPtr = reinterpret_cast<char*>(targetStart);
-  }
-  assert((result != targetExhausted)
-         && "ConvertUTF8toUTFXX exhausted target buffer");
-  return result != conversionOK;
+  return !ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr);
 }
 
 bool StringLiteralParser::DiagnoseBadString(const Token &Tok) {
-- 
2.40.0