From 941e47cef26fb6300a8d3e366f7c5694277e5849 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 3 Jul 2012 02:24:52 +0000 Subject: [PATCH] Share ConvertUTF8toWide() between Lex and CodeGen. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159634 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/ConvertUTF.h | 19 +++++++++++ lib/Basic/CMakeLists.txt | 1 + lib/Basic/ConvertUTFWrapper.cpp | 54 ++++++++++++++++++++++++++++++++ lib/CodeGen/CGExpr.cpp | 39 ++--------------------- lib/Lex/LiteralSupport.cpp | 37 +--------------------- 5 files changed, 77 insertions(+), 73 deletions(-) create mode 100644 lib/Basic/ConvertUTFWrapper.cpp diff --git a/include/clang/Basic/ConvertUTF.h b/include/clang/Basic/ConvertUTF.h index 7fb5874027..53d451485d 100644 --- a/include/clang/Basic/ConvertUTF.h +++ b/include/clang/Basic/ConvertUTF.h @@ -159,6 +159,25 @@ Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd); #ifdef __cplusplus } + +/*************************************************************************/ +/* Below are LLVM-specific wrappers of the functions above. */ + +#include "llvm/ADT/StringRef.h" + +namespace clang { + +/** + * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on + * WideCharWidth. The converted data is written to ResultPtr, which needs to + * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success, + * ResultPtr will point one after the end of the copied string. + * \return true on success. + */ +bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, + char *&ResultPtr); + +} #endif #endif diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt index 0169539736..611ae14438 100644 --- a/lib/Basic/CMakeLists.txt +++ b/lib/Basic/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS mc) add_clang_library(clangBasic Builtins.cpp ConvertUTF.c + ConvertUTFWrapper.cpp Diagnostic.cpp DiagnosticIDs.cpp FileManager.cpp diff --git a/lib/Basic/ConvertUTFWrapper.cpp b/lib/Basic/ConvertUTFWrapper.cpp new file mode 100644 index 0000000000..42b4f58079 --- /dev/null +++ b/lib/Basic/ConvertUTFWrapper.cpp @@ -0,0 +1,54 @@ +//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----=== +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/ConvertUTF.h" +#include "clang/Basic/LLVM.h" + +namespace clang { + +bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, + char *&ResultPtr) { + assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); + ConversionResult result = conversionOK; + // Copy the character span over. + if (WideCharWidth == 1) { + if (!isLegalUTF8String(reinterpret_cast(Source.begin()), + reinterpret_cast(Source.end()))) + result = sourceIllegal; + memcpy(ResultPtr, Source.data(), Source.size()); + ResultPtr += Source.size(); + } else if (WideCharWidth == 2) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *targetStart = reinterpret_cast(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF16( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 2*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast(targetStart); + } else if (WideCharWidth == 4) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *targetStart = reinterpret_cast(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF32( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 4*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast(targetStart); + } + assert((result != targetExhausted) + && "ConvertUTF8toUTFXX exhausted target buffer"); + return result == conversionOK; +} + +} diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 5f708d7a73..7fbe0d02b5 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1732,47 +1732,12 @@ GetAddrOfConstantWideString(StringRef Str, return GV; } -// FIXME: Mostly copied from StringLiteralParser::CopyStringFragment static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, SmallString<32>& Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char* ResultPtr = &Target[0]; - - assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4); - ConversionResult result = conversionOK; - // Copy the character span over. - if (CharByteWidth == 1) { - if (!isLegalUTF8String(reinterpret_cast(&*Source.begin()), - reinterpret_cast(&*Source.end()))) - result = sourceIllegal; - memcpy(ResultPtr, Source.data(), Source.size()); - ResultPtr += Source.size(); - } else if (CharByteWidth == 2) { - UTF8 const *sourceStart = (UTF8 const *)Source.data(); - // FIXME: Make the type of the result buffer correct instead of - // using reinterpret_cast. - UTF16 *targetStart = reinterpret_cast(ResultPtr); - ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF16( - &sourceStart,sourceStart + Source.size(), - &targetStart,targetStart + 2*Source.size(),flags); - if (result==conversionOK) - ResultPtr = reinterpret_cast(targetStart); - } else if (CharByteWidth == 4) { - UTF8 const *sourceStart = (UTF8 const *)Source.data(); - // FIXME: Make the type of the result buffer correct instead of - // using reinterpret_cast. - UTF32 *targetStart = reinterpret_cast(ResultPtr); - ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF32( - &sourceStart,sourceStart + Source.size(), - &targetStart,targetStart + 4*Source.size(),flags); - if (result==conversionOK) - ResultPtr = reinterpret_cast(targetStart); - } - assert((result != targetExhausted) - && "ConvertUTF8toUTFXX exhausted target buffer"); - assert(result == conversionOK); + bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr); + assert(success); Target.resize(ResultPtr - &Target[0]); } diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index b2290b3187..9e3c7786a7 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -1330,45 +1330,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ } } - /// copyStringFragment - This function copies from Start to End into ResultPtr. /// Performs widening for multi-byte characters. bool StringLiteralParser::CopyStringFragment(StringRef Fragment) { - assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4); - ConversionResult result = conversionOK; - // Copy the character span over. - if (CharByteWidth == 1) { - if (!isLegalUTF8String(reinterpret_cast(Fragment.begin()), - reinterpret_cast(Fragment.end()))) - result = sourceIllegal; - memcpy(ResultPtr, Fragment.data(), Fragment.size()); - ResultPtr += Fragment.size(); - } else if (CharByteWidth == 2) { - UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); - // FIXME: Make the type of the result buffer correct instead of - // using reinterpret_cast. - UTF16 *targetStart = reinterpret_cast(ResultPtr); - ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF16( - &sourceStart,sourceStart + Fragment.size(), - &targetStart,targetStart + 2*Fragment.size(),flags); - if (result==conversionOK) - ResultPtr = reinterpret_cast(targetStart); - } else if (CharByteWidth == 4) { - UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); - // FIXME: Make the type of the result buffer correct instead of - // using reinterpret_cast. - UTF32 *targetStart = reinterpret_cast(ResultPtr); - ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF32( - &sourceStart,sourceStart + Fragment.size(), - &targetStart,targetStart + 4*Fragment.size(),flags); - if (result==conversionOK) - ResultPtr = reinterpret_cast(targetStart); - } - assert((result != targetExhausted) - && "ConvertUTF8toUTFXX exhausted target buffer"); - return result != conversionOK; + return !ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr); } bool StringLiteralParser::DiagnoseBadString(const Token &Tok) { -- 2.40.0