From: Nico Weber Date: Sun, 21 Apr 2019 16:58:25 +0000 (+0000) Subject: llvm-undname: Fix stack overflow on almost-valid X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f6b23dfdd44b577d33e440c98b49f7d3b4182daa;p=llvm llvm-undname: Fix stack overflow on almost-valid If a unsigned with all 4 bytes non-0 was passed to outputHex(), there were two off-by-ones in it: - Both MaxPos and Pos left space for the final \0, which left the buffer one byte to small. Set MaxPos to 16 instead of 15 to fix. - The `assert(Pos >= 0);` was after a `Pos--`, move it up one line. Since valid Unicode codepoints are <= 0x10ffff, this could never really happen in practice. Found by oss-fuzz. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358856 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp index b421f2a7f93..01a742a874e 100644 --- a/lib/Demangle/MicrosoftDemangle.cpp +++ b/lib/Demangle/MicrosoftDemangle.cpp @@ -1071,17 +1071,17 @@ static void outputHex(OutputStream &OS, unsigned C) { char TempBuffer[17]; ::memset(TempBuffer, 0, sizeof(TempBuffer)); - constexpr int MaxPos = 15; + constexpr int MaxPos = sizeof(TempBuffer) - 1; - int Pos = MaxPos - 1; + int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. while (C != 0) { for (int I = 0; I < 2; ++I) { writeHexDigit(&TempBuffer[Pos--], C % 16); C /= 16; } TempBuffer[Pos--] = 'x'; - TempBuffer[Pos--] = '\\'; assert(Pos >= 0); + TempBuffer[Pos--] = '\\'; } OS << StringView(&TempBuffer[Pos + 1]); } diff --git a/test/Demangle/ms-string-literals.test b/test/Demangle/ms-string-literals.test index 2fe3384fe74..7ba6b48e6ae 100644 --- a/test/Demangle/ms-string-literals.test +++ b/test/Demangle/ms-string-literals.test @@ -781,3 +781,13 @@ ??_C@_0CC@MBPKDIAM@a?$AA?$AA?$AAb?$AA?$AA?$AAc?$AA?$AA?$AAd?$AA?$AA?$AAe?$AA?$AA?$AAf?$AA?$AA?$AAg?$AA?$AA?$AAh?$AA?$AA?$AA@ ; CHECK: u"a\0b\0c\0d\0e\0f\0g\0h\0"... + +; This is technically not a valid u32 string since the character in it is not +; <= 0x10FFFF like unicode demands. (Also, the crc doesn't match the contents.) +; It's here because this input used to cause a stack overflow in outputHex(). + +; FIXME: The demangler currently writes for \x codes for a single U string +; character. That's incorrect since that would mangle two four characters. + +??_C@_07LJGFEJEB@D3?$CC?$BB?$AA?$AA?$AA?$AA@) +; CHECK: U"\x11\x22\x33\x44"