]> granicus.if.org Git - icu/commitdiff
ICU-11369 Regex, fix incorrect optimization of patterns with a zero length quantifier {0}
authorAndy Heninger <andy.heninger@gmail.com>
Wed, 12 Nov 2014 23:42:14 +0000 (23:42 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Wed, 12 Nov 2014 23:42:14 +0000 (23:42 +0000)
X-SVN-Rev: 36727

icu4c/source/i18n/regexcmp.cpp
icu4c/source/i18n/regexcmp.h
icu4c/source/test/testdata/regextst.txt

index 0b78a9a40add289c73afc4b0e9ce5513e6513393..26cbc75589a90ed832b6174fd667576d408a8251 100644 (file)
@@ -2339,7 +2339,15 @@ UBool RegexCompile::compileInlineInterval() {
     int32_t   topOfBlock = blockTopLoc(FALSE);
     if (fIntervalUpper == 0) {
         // Pathological case.  Attempt no matches, as if the block doesn't exist.
+        // Discard the generated code for the block.
+        // If the block included parens, discard the info pertaining to them as well.
         fRXPat->fCompiledPat->setSize(topOfBlock);
+        if (fMatchOpenParen >= topOfBlock) {
+            fMatchOpenParen = -1;
+        }
+        if (fMatchCloseParen >= topOfBlock) {
+            fMatchCloseParen = -1;
+        }
         return TRUE;
     }
 
index debdf45833ae6a2a1fa4aa547dca24e54d15626a..3e69434ea54d20bebc72743c785b150954d4a2d5 100644 (file)
@@ -187,7 +187,9 @@ private:
     int32_t                       fMatchOpenParen;   // The position in the compiled pattern
                                                      //   of the slot reserved for a state save
                                                      //   at the start of the most recently processed
-                                                     //   parenthesized block.
+                                                     //   parenthesized block. Updated when processing
+                                                     //   a close to the location for the corresponding open.
+
     int32_t                       fMatchCloseParen;  // The position in the pattern of the first
                                                      //   location after the most recently processed
                                                      //   parenthesized block.
index 1bf7d6e40ae8d47131e2643eefb59fab693d0cfd..d642e8b9c8b3d33eff50902f46fd97b0af25f955 100644 (file)
 "A|B|\U00012345"                "hello <0>\U00012345</0>"
 "A|B|\U00010000"                "hello \ud800"
 
+# Bug 11369
+#   Incorrect optimization of patterns with a zero length quantifier {0}
+
+"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)"   "AAAAABBBBBCCCCCDDDDEEEEE"
+"(|b)ab(c)"                     "<0><1></1>ab<2>c</2></0>"
+"(|b){0}a{3}(D*)"               "<0>aaa<2></2></0>"
+"(|b){0,1}a{3}(D*)"             "<0><1></1>aaa<2></2></0>"
+"((|b){0})a{3}(D*)"             "<0><1></1>aaa<3></3></0>"
+
 # Bug 11370
 #   Max match length computation of look-behind expression gives result that is too big to fit in the
 #   in the 24 bit operand portion of the compiled code. Expressions should fail to compile
 "(?<!(0123456789a){10000000})x"         E  "no match"
 "(?<!\\ubeaf(\\ubeaf{11000}){11000})"   E  "no match"
 
+
 #  Random debugging, Temporary
 #