From 2ed857ce6c3079a4a87475bed59216457757f167 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 1 Mar 2019 22:40:23 +0000 Subject: [PATCH] libre2c: updated benchmark. --- re2c/lib/bench.cc | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/re2c/lib/bench.cc b/re2c/lib/bench.cc index df96bda5..a13b768c 100644 --- a/re2c/lib/bench.cc +++ b/re2c/lib/bench.cc @@ -17,7 +17,7 @@ #endif -static const int NO_RE2 = 1 << 31; +static const int XREG_RE2 = 1 << 31; static inline void show(const char *prefix1, const char *prefix2 , clock_t t1, clock_t t2, bool ok) @@ -32,9 +32,10 @@ static inline void show(const char *prefix1, const char *prefix2 } static void bench_re2c(const char *regexp, const char *string, size_t ntimes - , int flags, int mask, const char *prefix) + , int flags, int mask, int need, const char *prefix) { - if (flags & mask) { + if ((flags & mask) + || (need != 0 && !(flags & need))) { return; } @@ -65,7 +66,7 @@ static void bench_re2c(const char *regexp, const char *string, size_t ntimes static void bench_re2(const char *regexp, const char *string, size_t ntimes , int mask, const char *prefix) { - if (mask & NO_RE2) { + if (mask & XREG_RE2) { return; } @@ -101,17 +102,17 @@ static void bench_re2(const char *regexp, const char *string, size_t ntimes } #endif -static void bench(const char *r, const char *s, size_t n, int mask) +static void bench(const char *r, const char *s, size_t n, int mask, int need) { fprintf(stderr, "\nr: %.*s..., s: %.*s..., n: %lu\n", 30, r, 30, s, n); -// bench_re2c(r, s, n, 0, mask, "re2c-tdfa"); - bench_re2c(r, s, n, REG_NFA, mask, "re2c-tnfa-posix-gor1"); - bench_re2c(r, s, n, REG_NFA | REG_GTOP, mask, "re2c-tnfa-posix-gtop"); - bench_re2c(r, s, n, REG_NFA | REG_SLOWPREC, mask, "re2c-tnfa-posix-gor1-slow"); - bench_re2c(r, s, n, REG_NFA | REG_LEFTMOST, mask, "re2c-tnfa-leftmost"); - bench_re2c(r, s, n, REG_NFA | REG_TRIE, mask, "re2c-tnfa-posix-gtop-trie"); - bench_re2c(r, s, n, REG_NFA | REG_TRIE | REG_LEFTMOST, mask, "re2c-tnfa-leftmost-trie"); + bench_re2c(r, s, n, 0, mask, need, "re2c-tdfa"); + bench_re2c(r, s, n, REG_NFA, mask, need, "re2c-tnfa-posix-gor1"); + bench_re2c(r, s, n, REG_NFA | REG_GTOP, mask, need, "re2c-tnfa-posix-gtop"); + bench_re2c(r, s, n, REG_NFA | REG_SLOWPREC, mask, need, "re2c-tnfa-posix-gor1-slow"); + bench_re2c(r, s, n, REG_NFA | REG_LEFTMOST, mask, need, "re2c-tnfa-leftmost"); + bench_re2c(r, s, n, REG_NFA | REG_TRIE, mask, need, "re2c-tnfa-posix-gtop-trie"); + bench_re2c(r, s, n, REG_NFA | REG_TRIE | REG_LEFTMOST, mask, need, "re2c-tnfa-leftmost-trie"); #ifdef HAVE_RE2_RE2_H bench_re2(r, s, n, mask, "re2"); @@ -127,16 +128,16 @@ int main() regexp = "([a-zA-Z]([a-zA-Z0-9+.-])*)[:]([/][/](((([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:])*)[@])?(([[]((([0-9a-fA-F]{1,4}[:]){6}([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|[:][:]([0-9a-fA-F]{1,4}[:]){5}([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|([0-9a-fA-F]{1,4})?[:][:]([0-9a-fA-F]{1,4}[:]){4}([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|(([0-9a-fA-F]{1,4}[:]){0,1} [0-9a-fA-F]{1,4})?[:][:]([0-9a-fA-F]{1,4}[:]){3}([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|(([0-9a-fA-F]{1,4}[:]){0,2} [0-9a-fA-F]{1,4})?[:][:]([0-9a-fA-F]{1,4}[:]){2}([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|(([0-9a-fA-F]{1,4}[:]){0,3} [0-9a-fA-F]{1,4})?[:][:][0-9a-fA-F]{1,4}[:]([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|(([0-9a-fA-F]{1,4}[:]){0,4} [0-9a-fA-F]{1,4})?[:][:]([0-9a-fA-F]{1,4}[:][0-9a-fA-F]{1,4}|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])))|(([0-9a-fA-F]{1,4}[:]){0,5} [0-9a-fA-F]{1,4})?[:][:][0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}[:]){0,6} [0-9a-fA-F]{1,4})?[:][:])|([v][0-9a-fA-F]+[.]([a-zA-Z0-9._~-]|[!$&'()*+,;=]|[:])+))[\\]])|(([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35])[.]([0-9]|[\x31-\x39][0-9]|[1][0-9]{2}|[2][\x30-\x34][0-9]|[2][5][\x30-\x35]))|(([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=])*))([:][0-9]*)?)(([/]([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])*)*)|([/](([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])+([/]([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])*)*)?)|(([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])+([/]([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])*)*)|([/]?))([?](([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])|[/?])*)?([#](([a-zA-Z0-9._~-]|[%][0-9a-fA-F]{2}|[!$&'()*+,;=:@])|[/?])*)?"; string = "http://240.147.163.34:2124/z/dibws/hkwfcdsvlghmmyd/ltiyxrfuam?fshxiix3zNzrBJNnuLDXnOruEmp4fX"; - bench(regexp, string, 1000, 0); + bench(regexp, string, 1000, 0, 0); regexp = "(a|aa)*"; memset(longstring, 'a', VERY_LONG); longstring[VERY_LONG - 1] = 0; - bench(regexp, longstring, 1, REG_TRIE); + bench(regexp, longstring, 1, REG_TRIE, 0); regexp = "((((a)*))*|(((((a)))*))+)*"; memset(longstring, 'a', VERY_LONG); - bench(regexp, longstring, 1, REG_TRIE); + bench(regexp, longstring, 1, REG_TRIE, 0); regexp = "(X|Xa|Xab|Xaba|abab|baba|bY|Y)*"; longstring[0] = 'X'; @@ -145,17 +146,14 @@ int main() longstring[i++] = 'b'; } longstring[VERY_LONG - 1] = 'Y'; - bench(regexp, longstring, 1, REG_TRIE); + bench(regexp, longstring, 1, REG_TRIE, 0); regexp = "(((..)|(.))((..)|(.))((..)|(.)))*"; memset(longstring, 'a', VERY_LONG); - bench(regexp, longstring, 1, REG_TRIE); + bench(regexp, longstring, 1, REG_TRIE, 0); regexp = "(((){0,100}){0,100}){0,100}"; - // regexp = "(){0,10000}"; - //regexp = "(((){0,10}){0,10}){0,10}"; - string = ""; - bench(regexp, string, 1, REG_LEFTMOST | NO_RE2); + bench(regexp, "", 1, REG_LEFTMOST | XREG_RE2, 0); // Pathological case for constant-memory POSIX algorithms that use naive // (worst-case cubic in the size of TNFA) algorithm for precedence matrix @@ -166,10 +164,13 @@ int main() // affected, but they consume memory proportional to the length of input, // and so are also not practical. regexp = "((a?){1,1000})*"; - bench(regexp, "aaaa", 1, 0); + bench(regexp, "aaaa", 1, 0, REG_NFA); + + regexp = "((a?){1,300})*"; + bench(regexp, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 4, 0, 0); regexp = "((((a?){1,10}){1,10}){1,10})*"; - bench(regexp, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1, 0); + bench(regexp, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1, 0, REG_NFA); delete[] longstring; return 0; -- 2.40.0