From: K.Kosako Date: Sat, 22 Sep 2018 10:52:57 +0000 (+0900) Subject: use Sunday quick search algorithm instead of Boyer-Moor-Horspool X-Git-Tag: v6.9.1~18^2~60 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d5d888e8f47e64788f004cf871522a0724096294;p=onig use Sunday quick search algorithm instead of Boyer-Moor-Horspool --- diff --git a/src/regcomp.c b/src/regcomp.c index fc2b48f..08dc3d6 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -4552,6 +4552,24 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) return r; } +#ifdef USE_SUNDAY_QUICK_SEARCH_ALGORITHM +static int +set_bmh_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[]) +{ + int i, len; + + len = (int )(end - s); + if (len + 1 >= ONIG_CHAR_TABLE_SIZE) + return ONIGERR_PARSER_BUG; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1); + + for (i = 0; i < len; i++) + skip[s[i]] = len - i; + + return 0; +} +#else /* set skip map for Boyer-Moore-Horspool search */ static int set_bmh_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[]) @@ -4571,6 +4589,8 @@ set_bmh_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[]) return ONIGERR_PARSER_BUG; } } +#endif /* USE_SUNDAY_QUICK_SEARCH_ALGORITHM */ + #define OPT_EXACT_MAXLEN 24 diff --git a/src/regexec.c b/src/regexec.c index ce60cf8..48886bf 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -4112,6 +4112,79 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, return (UChar* )NULL; } +#ifdef USE_SUNDAY_QUICK_SEARCH_ALGORITHM + +static UChar* +bmh_search_forward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *p, *end; + const UChar *tail; + int skip, tlen1; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "bmh_search_forward: text: %p, text_end: %p, text_range: %p\n", + text, text_end, text_range); +#endif + + tail = target_end - 1; + tlen1 = (int )(tail - target); + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; + } + if (se + 1 >= text_end) break; + skip = reg->map[*(se + 1)]; + t = s; + do { + s += enclen(reg->enc, s); + } while ((s - t) < skip && s < end); + } + + return (UChar* )NULL; +} + +static UChar* +bmh_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + + end = text_range + (target_end - target); + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text + (tail - target); + + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + if (s + 1 >= text_end) break; + s += reg->map[*(s + 1)]; + } + + return (UChar* )NULL; +} + +#else + static UChar* bmh_search_forward(regex_t* reg, const UChar* target, const UChar* target_end, const UChar* text, const UChar* text_end, @@ -4178,6 +4251,7 @@ bmh_search(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } +#endif /* USE_SUNDAY_QUICK_SEARCH_ALGORITHM */ static UChar* map_search(OnigEncoding enc, UChar map[], diff --git a/src/regint.h b/src/regint.h index 1782dbe..50a9e36 100644 --- a/src/regint.h +++ b/src/regint.h @@ -62,6 +62,7 @@ #define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +#define USE_SUNDAY_QUICK_SEARCH_ALGORITHM #define USE_RETRY_LIMIT_IN_MATCH