From: Andy Heninger Date: Wed, 12 Nov 2014 23:42:14 +0000 (+0000) Subject: ICU-11369 Regex, fix incorrect optimization of patterns with a zero length quantifier {0} X-Git-Tag: milestone-59-0-1~1448 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f5d14979c61b605ffe15f21ab1ecdaf7d606aeee;p=icu ICU-11369 Regex, fix incorrect optimization of patterns with a zero length quantifier {0} X-SVN-Rev: 36727 --- diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index 0b78a9a40ad..26cbc75589a 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -2339,7 +2339,15 @@ UBool RegexCompile::compileInlineInterval() { int32_t topOfBlock = blockTopLoc(FALSE); if (fIntervalUpper == 0) { // Pathological case. Attempt no matches, as if the block doesn't exist. + // Discard the generated code for the block. + // If the block included parens, discard the info pertaining to them as well. fRXPat->fCompiledPat->setSize(topOfBlock); + if (fMatchOpenParen >= topOfBlock) { + fMatchOpenParen = -1; + } + if (fMatchCloseParen >= topOfBlock) { + fMatchCloseParen = -1; + } return TRUE; } diff --git a/icu4c/source/i18n/regexcmp.h b/icu4c/source/i18n/regexcmp.h index debdf45833a..3e69434ea54 100644 --- a/icu4c/source/i18n/regexcmp.h +++ b/icu4c/source/i18n/regexcmp.h @@ -187,7 +187,9 @@ private: int32_t fMatchOpenParen; // The position in the compiled pattern // of the slot reserved for a state save // at the start of the most recently processed - // parenthesized block. + // parenthesized block. Updated when processing + // a close to the location for the corresponding open. + int32_t fMatchCloseParen; // The position in the pattern of the first // location after the most recently processed // parenthesized block. diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt index 1bf7d6e40ae..d642e8b9c8b 100644 --- a/icu4c/source/test/testdata/regextst.txt +++ b/icu4c/source/test/testdata/regextst.txt @@ -1201,6 +1201,15 @@ "A|B|\U00012345" "hello <0>\U00012345" "A|B|\U00010000" "hello \ud800" +# Bug 11369 +# Incorrect optimization of patterns with a zero length quantifier {0} + +"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" +"(|b)ab(c)" "<0><1>ab<2>c" +"(|b){0}a{3}(D*)" "<0>aaa<2>" +"(|b){0,1}a{3}(D*)" "<0><1>aaa<2>" +"((|b){0})a{3}(D*)" "<0><1>aaa<3>" + # Bug 11370 # Max match length computation of look-behind expression gives result that is too big to fit in the # in the 24 bit operand portion of the compiled code. Expressions should fail to compile @@ -1209,6 +1218,7 @@ "(?