]> granicus.if.org Git - icu/commitdiff
ICU-20359 Fix stack overflow in Regex Pattern Compile.
authorAndy Heninger <andy.heninger@gmail.com>
Sat, 2 Mar 2019 00:49:21 +0000 (16:49 -0800)
committerAndy Heninger <andy.heninger@gmail.com>
Thu, 7 Mar 2019 18:31:30 +0000 (10:31 -0800)
icu4c/source/i18n/regexcmp.cpp
icu4c/source/test/intltest/regextst.cpp
icu4c/source/test/intltest/regextst.h

index a4c12804237ab9bff6a7627c33fd9658913f451c..2d14aa83703b43ca10293520edc54788da8c6ffc 100644 (file)
@@ -4010,7 +4010,7 @@ UChar32  RegexCompile::peekCharLL() {
 //
 //------------------------------------------------------------------------------
 void RegexCompile::nextChar(RegexPatternChar &c) {
-
+  tailRecursion:
     fScanIndex = UTEXT_GETNATIVEINDEX(fRXPat->fPattern);
     c.fChar    = nextCharLL();
     c.fQuoted  = FALSE;
@@ -4021,7 +4021,9 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
             c.fChar == (UChar32)-1) {
             fQuoteMode = FALSE;  //  Exit quote mode,
             nextCharLL();        // discard the E
-            nextChar(c);         // recurse to get the real next char
+            // nextChar(c);      // recurse to get the real next char
+            goto tailRecursion;  // Note: fuzz testing produced testcases that
+                                 //       resulted in stack overflow here.
         }
     }
     else if (fInBackslashQuote) {
@@ -4139,8 +4141,10 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
             else if (peekCharLL() == chQ) {
                 //  "\Q"  enter quote mode, which will continue until "\E"
                 fQuoteMode = TRUE;
-                nextCharLL();       // discard the 'Q'.
-                nextChar(c);        // recurse to get the real next char.
+                nextCharLL();        // discard the 'Q'.
+                // nextChar(c);      // recurse to get the real next char.
+                goto tailRecursion;  // Note: fuzz testing produced test cases that
+                //                            resulted in stack overflow here.
             }
             else
             {
index f8833b103890e5849e0129cba51d79884e0310f0..b1d191cdcc71e97c958fe699bae58de7a76e6ccf 100644 (file)
@@ -104,6 +104,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
     TESTCASE_AUTO(TestBug12884);
     TESTCASE_AUTO(TestBug13631);
     TESTCASE_AUTO(TestBug13632);
+    TESTCASE_AUTO(TestBug20359);
     TESTCASE_AUTO_END;
 }
 
@@ -5851,4 +5852,30 @@ void RegexTest::TestBug13632() {
     uregex_close(re);
 }
 
+void RegexTest::TestBug20359() {
+    // The bug was stack overflow while parsing a pattern with a huge number of adjacent \Q\E
+    // pairs. (Enter and exit pattern literal quote mode). Logic was correct.
+    // Changed implementation to loop instead of recursing.
+
+    UnicodeString pattern;
+    for (int i=0; i<50000; ++i) {
+        pattern += u"\\Q\\E";
+    }
+    pattern += u"x";
+
+    UErrorCode status = U_ZERO_ERROR;
+    LocalURegularExpressionPointer re(uregex_open(pattern.getBuffer(), pattern.length(),
+                                       0, nullptr, &status));
+    assertSuccess(WHERE, status);
+
+    // We have passed the point where the bug crashed. The following is a small sanity
+    // check that the pattern works, that all the \Q\E\Q\E... didn't cause other problems.
+
+    uregex_setText(re.getAlias(), u"abcxyz", -1, &status);
+    assertSuccess(WHERE, status);
+    assertTrue(WHERE, uregex_find(re.getAlias(), 0, &status));
+    assertEquals(WHERE, 3, uregex_start(re.getAlias(), 0, &status));
+    assertSuccess(WHERE, status);
+}
+
 #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
index cfa62d70384ba9cc731d517314cd1fd60a54c473..58e9acb22c66b3c872c11055e94c3e751fe85d16 100644 (file)
@@ -59,6 +59,7 @@ public:
     virtual void TestBug12884();
     virtual void TestBug13631();
     virtual void TestBug13632();
+    virtual void TestBug20359();
 
     // The following functions are internal to the regexp tests.
     virtual void assertUText(const char *expected, UText *actual, const char *file, int line);