From: Dmitri Gribenko Date: Fri, 27 Jul 2012 20:36:22 +0000 (+0000) Subject: Add a function to convert a single Unicode code point to a UTF8 sequence. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e43031344b17cdb345957c4acc9d39d26f86efd3;p=clang Add a function to convert a single Unicode code point to a UTF8 sequence. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160890 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/ConvertUTF.h b/include/clang/Basic/ConvertUTF.h index 53d451485d..e7cfa8a767 100644 --- a/include/clang/Basic/ConvertUTF.h +++ b/include/clang/Basic/ConvertUTF.h @@ -110,6 +110,8 @@ typedef unsigned char Boolean; /* 0 or 1 */ #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF +#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4 + typedef enum { conversionOK, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ @@ -139,11 +141,13 @@ ConversionResult ConvertUTF8toUTF32 ( ConversionResult ConvertUTF16toUTF8 ( const UTF16** sourceStart, const UTF16* sourceEnd, UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); +#endif ConversionResult ConvertUTF32toUTF8 ( const UTF32** sourceStart, const UTF32* sourceEnd, UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); +#ifdef CLANG_NEEDS_THESE_ONE_DAY ConversionResult ConvertUTF16toUTF32 ( const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); @@ -177,6 +181,18 @@ namespace clang { bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr); +/** + * Convert an Unicode code point to UTF8 sequence. + * + * \param Source a Unicode code point. + * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least + * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is + * updated one past end of the converted sequence. + * + * \returns true on success. + */ +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr); + } #endif diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c index e1970039e1..4793b251f6 100644 --- a/lib/Basic/ConvertUTF.c +++ b/lib/Basic/ConvertUTF.c @@ -285,6 +285,7 @@ ConversionResult ConvertUTF16toUTF8 ( *targetStart = target; return result; } +#endif /* --------------------------------------------------------------------- */ @@ -339,8 +340,6 @@ ConversionResult ConvertUTF32toUTF8 ( return result; } -#endif - /* --------------------------------------------------------------------- */ /* diff --git a/lib/Basic/ConvertUTFWrapper.cpp b/lib/Basic/ConvertUTFWrapper.cpp index 42b4f58079..a1b3f7fd9d 100644 --- a/lib/Basic/ConvertUTFWrapper.cpp +++ b/lib/Basic/ConvertUTFWrapper.cpp @@ -51,4 +51,20 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, return result == conversionOK; } +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { + const UTF32 *SourceStart = &Source; + const UTF32 *SourceEnd = SourceStart + 1; + UTF8 *TargetStart = reinterpret_cast(ResultPtr); + UTF8 *TargetEnd = TargetStart + 4; + ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, + &TargetStart, TargetEnd, + strictConversion); + if (CR != conversionOK) + return false; + + ResultPtr = reinterpret_cast(TargetStart); + return true; } + +} // end namespace clang +