From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 21 Apr 2019 16:58:25 +0000 (+0000)
Subject: llvm-undname: Fix stack overflow on almost-valid
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f6b23dfdd44b577d33e440c98b49f7d3b4182daa;p=llvm

llvm-undname: Fix stack overflow on almost-valid

If a unsigned with all 4 bytes non-0 was passed to outputHex(), there
were two off-by-ones in it:

- Both MaxPos and Pos left space for the final \0, which left the buffer
  one byte to small. Set MaxPos to 16 instead of 15 to fix.

- The `assert(Pos >= 0);` was after a `Pos--`, move it up one line.

Since valid Unicode codepoints are <= 0x10ffff, this could never really
happen in practice.

Found by oss-fuzz.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358856 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp
index b421f2a7f93..01a742a874e 100644
--- a/lib/Demangle/MicrosoftDemangle.cpp
+++ b/lib/Demangle/MicrosoftDemangle.cpp
@@ -1071,17 +1071,17 @@ static void outputHex(OutputStream &OS, unsigned C) {
   char TempBuffer[17];
 
   ::memset(TempBuffer, 0, sizeof(TempBuffer));
-  constexpr int MaxPos = 15;
+  constexpr int MaxPos = sizeof(TempBuffer) - 1;
 
-  int Pos = MaxPos - 1;
+  int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
   while (C != 0) {
     for (int I = 0; I < 2; ++I) {
       writeHexDigit(&TempBuffer[Pos--], C % 16);
       C /= 16;
     }
     TempBuffer[Pos--] = 'x';
-    TempBuffer[Pos--] = '\\';
     assert(Pos >= 0);
+    TempBuffer[Pos--] = '\\';
   }
   OS << StringView(&TempBuffer[Pos + 1]);
 }
diff --git a/test/Demangle/ms-string-literals.test b/test/Demangle/ms-string-literals.test
index 2fe3384fe74..7ba6b48e6ae 100644
--- a/test/Demangle/ms-string-literals.test
+++ b/test/Demangle/ms-string-literals.test
@@ -781,3 +781,13 @@
 
 ??_C@_0CC@MBPKDIAM@a?$AA?$AA?$AAb?$AA?$AA?$AAc?$AA?$AA?$AAd?$AA?$AA?$AAe?$AA?$AA?$AAf?$AA?$AA?$AAg?$AA?$AA?$AAh?$AA?$AA?$AA@
 ; CHECK: u"a\0b\0c\0d\0e\0f\0g\0h\0"...
+
+; This is technically not a valid u32 string since the character in it is not
+; <= 0x10FFFF like unicode demands. (Also, the crc doesn't match the contents.)
+; It's here because this input used to cause a stack overflow in outputHex().
+
+; FIXME: The demangler currently writes for \x codes for a single U string
+; character. That's incorrect since that would mangle two four characters.
+
+??_C@_07LJGFEJEB@D3?$CC?$BB?$AA?$AA?$AA?$AA@)
+; CHECK: U"\x11\x22\x33\x44"