]> granicus.if.org Git - llvm/commitdiff
[YAML] Fix UTF-8 handling
authorFrancis Visoiu Mistrih <francisvm@yahoo.com>
Thu, 21 Dec 2017 17:14:09 +0000 (17:14 +0000)
committerFrancis Visoiu Mistrih <francisvm@yahoo.com>
Thu, 21 Dec 2017 17:14:09 +0000 (17:14 +0000)
Previous YAML quoting patches broke UTF-8 printing in YAML: see https://reviews.llvm.org/D41290#961801.

Differential Revision: https://reviews.llvm.org/D41490

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321283 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Support/YAMLTraits.cpp
unittests/Support/YAMLIOTest.cpp

index 05ca40f03018a9bf23ba35fcb198d01e3f42c69b..f8a80ba87873f6d2898d6b3b21390b9fe798d26c 100644 (file)
@@ -657,7 +657,12 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
       }
       i = j + 1;
     } else if (MustQuote == QuotingType::Double &&
-               !sys::unicode::isPrintable(S[j])) {
+               !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) {
+      // If we're double quoting non-printable characters, we prefer printing
+      // them as "\x" + their hex representation. Note that special casing is
+      // needed for UTF-8, where a byte may be part of a UTF-8 sequence and
+      // appear as non-printable, in which case we want to print the correct
+      // unicode character and not its hex representation.
       output(StringRef(&Base[i], j - i)); // "flush"
       output(StringLiteral("\\x"));
 
index 9caff85a59639c37a9aebe0dcb1272ea2c744146..650b02cdea9e824353f91bf56df35074e91deccc 100644 (file)
@@ -2541,3 +2541,31 @@ TEST(YAMLIO, TestEscapedSingleQuoteInsideSingleQuote) {
   ostr.flush();
   EXPECT_EQ("'abc''fdf'", out);
 }
+
+TEST(YAMLIO, TestEscapedUTF8SingleQuoteInsideDoubleQuote) {
+  std::string Id = "parameter 'параметр' is unused";
+
+  std::string out;
+  llvm::raw_string_ostream ostr(out);
+  Output xout(ostr, nullptr, 0);
+
+  llvm::yaml::EmptyContext Ctx;
+  yamlize(xout, Id, true, Ctx);
+
+  ostr.flush();
+  EXPECT_EQ("\"parameter 'параметр' is unused\"", out);
+}
+
+TEST(YAMLIO, TestEscapedUTF8) {
+  std::string Id = "/*параметр*/";
+
+  std::string out;
+  llvm::raw_string_ostream ostr(out);
+  Output xout(ostr, nullptr, 0);
+
+  llvm::yaml::EmptyContext Ctx;
+  yamlize(xout, Id, true, Ctx);
+
+  ostr.flush();
+  EXPECT_EQ("\"/*параметр*/\"", out);
+}