From e559b30309d7d76ceda0246e2802a15c2a01004f Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 1 Mar 2019 16:49:21 -0800 Subject: [PATCH] ICU-20359 Fix stack overflow in Regex Pattern Compile. --- icu4c/source/i18n/regexcmp.cpp | 12 +++++++---- icu4c/source/test/intltest/regextst.cpp | 27 +++++++++++++++++++++++++ icu4c/source/test/intltest/regextst.h | 1 + 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index a4c12804237..2d14aa83703 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -4010,7 +4010,7 @@ UChar32 RegexCompile::peekCharLL() { // //------------------------------------------------------------------------------ void RegexCompile::nextChar(RegexPatternChar &c) { - + tailRecursion: fScanIndex = UTEXT_GETNATIVEINDEX(fRXPat->fPattern); c.fChar = nextCharLL(); c.fQuoted = FALSE; @@ -4021,7 +4021,9 @@ void RegexCompile::nextChar(RegexPatternChar &c) { c.fChar == (UChar32)-1) { fQuoteMode = FALSE; // Exit quote mode, nextCharLL(); // discard the E - nextChar(c); // recurse to get the real next char + // nextChar(c); // recurse to get the real next char + goto tailRecursion; // Note: fuzz testing produced testcases that + // resulted in stack overflow here. } } else if (fInBackslashQuote) { @@ -4139,8 +4141,10 @@ void RegexCompile::nextChar(RegexPatternChar &c) { else if (peekCharLL() == chQ) { // "\Q" enter quote mode, which will continue until "\E" fQuoteMode = TRUE; - nextCharLL(); // discard the 'Q'. - nextChar(c); // recurse to get the real next char. + nextCharLL(); // discard the 'Q'. + // nextChar(c); // recurse to get the real next char. + goto tailRecursion; // Note: fuzz testing produced test cases that + // resulted in stack overflow here. } else { diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index f8833b10389..b1d191cdcc7 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -104,6 +104,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch TESTCASE_AUTO(TestBug12884); TESTCASE_AUTO(TestBug13631); TESTCASE_AUTO(TestBug13632); + TESTCASE_AUTO(TestBug20359); TESTCASE_AUTO_END; } @@ -5851,4 +5852,30 @@ void RegexTest::TestBug13632() { uregex_close(re); } +void RegexTest::TestBug20359() { + // The bug was stack overflow while parsing a pattern with a huge number of adjacent \Q\E + // pairs. (Enter and exit pattern literal quote mode). Logic was correct. + // Changed implementation to loop instead of recursing. + + UnicodeString pattern; + for (int i=0; i<50000; ++i) { + pattern += u"\\Q\\E"; + } + pattern += u"x"; + + UErrorCode status = U_ZERO_ERROR; + LocalURegularExpressionPointer re(uregex_open(pattern.getBuffer(), pattern.length(), + 0, nullptr, &status)); + assertSuccess(WHERE, status); + + // We have passed the point where the bug crashed. The following is a small sanity + // check that the pattern works, that all the \Q\E\Q\E... didn't cause other problems. + + uregex_setText(re.getAlias(), u"abcxyz", -1, &status); + assertSuccess(WHERE, status); + assertTrue(WHERE, uregex_find(re.getAlias(), 0, &status)); + assertEquals(WHERE, 3, uregex_start(re.getAlias(), 0, &status)); + assertSuccess(WHERE, status); +} + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ diff --git a/icu4c/source/test/intltest/regextst.h b/icu4c/source/test/intltest/regextst.h index cfa62d70384..58e9acb22c6 100644 --- a/icu4c/source/test/intltest/regextst.h +++ b/icu4c/source/test/intltest/regextst.h @@ -59,6 +59,7 @@ public: virtual void TestBug12884(); virtual void TestBug13631(); virtual void TestBug13632(); + virtual void TestBug20359(); // The following functions are internal to the regexp tests. virtual void assertUText(const char *expected, UText *actual, const char *file, int line); -- 2.40.0