This can also be used instead of the WindowsSupport.h ConvertUTF8ToUTF16
helpers, but that will require massaging some character types. The
Windows support routines want wchar_t output, but wchar_t is often 32
bits on non-Windows OSs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227122
91177308-0d34-0410-b5e6-
96231b3b80d8
*/
bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
+/**
+ * Converts a UTF-8 string into a UTF-16 string with native endianness.
+ *
+ * \returns true on success
+ */
+bool convertUTF8ToUTF16String(StringRef SrcUTF8,
+ SmallVectorImpl<UTF16> &DstUTF16);
+
} /* end namespace llvm */
#endif
return true;
}
+bool convertUTF8ToUTF16String(StringRef SrcUTF8,
+ SmallVectorImpl<UTF16> &DstUTF16) {
+ assert(DstUTF16.empty());
+
+ // Avoid OOB by returning early on empty input.
+ if (SrcUTF8.empty())
+ return true;
+
+ const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
+ const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
+
+ // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
+ // as UTF-16 should always require the same amount or less code units than the
+ // UTF-8 encoding.
+ DstUTF16.resize(SrcUTF8.size());
+ UTF16 *Dst = &DstUTF16[0];
+ UTF16 *DstEnd = Dst + DstUTF16.size();
+
+ ConversionResult CR =
+ ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+ assert(CR != targetExhausted);
+
+ if (CR != conversionOK) {
+ DstUTF16.clear();
+ return false;
+ }
+
+ DstUTF16.resize(Dst - &DstUTF16[0]);
+ return true;
+}
+
} // end namespace llvm
//===----------------------------------------------------------------------===//
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Format.h"
#include "gtest/gtest.h"
#include <string>
#include <utility>
EXPECT_EQ(Expected, Result);
}
+TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
+ // Src is the look of disapproval.
+ static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
+ StringRef Ref(Src, sizeof(Src) - 1);
+ SmallVector<UTF16, 5> Result;
+ bool Success = convertUTF8ToUTF16String(Ref, Result);
+ EXPECT_TRUE(Success);
+ static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
+ ASSERT_EQ(3, Result.size());
+ for (int I = 0, E = 3; I != E; ++I)
+ EXPECT_EQ(Expected[I], Result[I]);
+}
+
TEST(ConvertUTFTest, OddLengthInput) {
std::string Result;
bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);