From: Ulya Trofimovich Date: Wed, 20 Feb 2019 17:00:47 +0000 (+0000) Subject: libre2c: found pathological case for constant-memory POSIX algorithms. X-Git-Tag: 1.2~141 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c0c951cb785d30a48d549df294ef4ea988245eb0;p=re2c libre2c: found pathological case for constant-memory POSIX algorithms. The regexp is ((a?){1,200})*, and the input string is just "a". Takes quibic time in the size of counter. This is caused by quadratic- time computation of precedence matrix on each step (the number of TNFA states in the closure approaches TNFA size), multiplied by the length of compared histores (which also approaches TNFA size). Trie-based algorithms are not affected, but they consume memory proportional to the length of the input string, and so are also not practical. --- diff --git a/re2c/lib/bench.cc b/re2c/lib/bench.cc index cf4510a3..c17b852e 100644 --- a/re2c/lib/bench.cc +++ b/re2c/lib/bench.cc @@ -124,10 +124,6 @@ int main() longstring[VERY_LONG - 1] = 0; bench(regexp, longstring, 1, true); - regexp = "((a?){1,10}(a)?)*"; - memset(longstring, 'a', VERY_LONG); - bench(regexp, longstring, 1, true); - regexp = "((((a)*))*|(((((a)))*))+)*"; memset(longstring, 'a', VERY_LONG); bench(regexp, longstring, 1, true); @@ -151,6 +147,16 @@ int main() string = ""; bench(regexp, string, 1, false); + // Pathological case for constant-memory POSIX algorithms (includes TDFA). + // Takes quibic time in the size of counter. This is caused by quadratic- + // time computation of precedence matrix on each step (the number of TNFA + // states in the closure approaches TNFA size), multiplied by the length + // of compared histores (which also approaches TNFA size). Trie-based + // algorithms are not affected, but they consume memory proportional to + // the length of the input string, and so are also not practical. + regexp = "((a?){1,200})*"; + bench(regexp, "a", 1, false); + delete[] longstring; return 0; }