From: K.Kosako Date: Tue, 25 Sep 2018 06:05:25 +0000 (+0900) Subject: add map_offset member into re_pattern_buffer X-Git-Tag: v6.9.1~18^2~52 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=12de33cf2fae9416a739504816b22193d8150b77;p=onig add map_offset member into re_pattern_buffer --- diff --git a/src/regcomp.c b/src/regcomp.c index fc4b3d5..9b8150b 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -4555,28 +4555,48 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #ifdef USE_SUNDAY_QUICK_SEARCH_ALGORITHM static int set_sunday_quick_search_skip_table(UChar* s, UChar* end, OnigEncoding enc, - UChar skip[]) + UChar skip[], int* roffset) { - int i, len, emin; + int i, len, offset; - emin = ONIGENC_MBC_MINLEN(enc); + offset = 1; + if (ONIGENC_MBC_MINLEN(enc) > 1) { + UChar* p = s; + while (1) { + len = enclen(enc, p); + if (p + len >= end) { + UChar* q = p + (ONIGENC_MBC_MINLEN(enc) - 1); + while (q > p) { + if (*q != '\0') { + offset = q - p + 1; + break; + } + q--; + } + break; + } + p += len; + } + } len = (int )(end - s); - if (len + emin >= ONIG_CHAR_TABLE_SIZE) + if (len + offset >= ONIG_CHAR_TABLE_SIZE) return ONIGERR_PARSER_BUG; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + emin); + *roffset = offset; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + offset); for (i = 0; i < len; i++) - skip[s[i]] = len - i + (emin - 1); + skip[s[i]] = len - i + (offset - 1); return 0; } #else /* set skip map for Boyer-Moore-Horspool search */ static int -set_bmh_search_skip_table(UChar* s, UChar* end, - OnigEncoding enc ARG_UNUSED, UChar skip[]) +set_bmh_search_skip_table(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + UChar skip[]) { int i, len; @@ -5585,7 +5605,8 @@ set_optimize_exact(regex_t* reg, OptExact* e) if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { #ifdef USE_SUNDAY_QUICK_SEARCH_ALGORITHM r = set_sunday_quick_search_skip_table(reg->exact, reg->exact_end, - reg->enc, reg->map); + reg->enc, reg->map, + &(reg->map_offset)); #else r = set_bmh_search_skip_table(reg->exact, reg->exact_end, reg->enc, reg->map); @@ -5706,6 +5727,7 @@ clear_optimize_info(regex_t* reg) reg->anchor_dmax = 0; reg->sub_anchor = 0; reg->exact_end = (UChar* )NULL; + reg->map_offset = 0; reg->threshold_len = 0; if (IS_NOT_NULL(reg->exact)) { xfree(reg->exact); diff --git a/src/regexec.c b/src/regexec.c index 2ab410e..61ca413 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -4123,7 +4123,7 @@ sunday_quick_search_step_forward(regex_t* reg, const UChar *s, *se, *t, *p, *end; const UChar *tail; int skip, tlen1; - int enc_minlen; + int map_offset; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -4136,7 +4136,7 @@ sunday_quick_search_step_forward(regex_t* reg, if (end + tlen1 > text_end) end = text_end - tlen1; - enc_minlen = ONIGENC_MBC_MINLEN(reg->enc); + map_offset = reg->map_offset; s = text; while (s < end) { @@ -4146,8 +4146,8 @@ sunday_quick_search_step_forward(regex_t* reg, if (t == target) return (UChar* )s; p--; t--; } - if (se + enc_minlen >= text_end) break; - skip = reg->map[*(se + enc_minlen)]; + if (se + map_offset >= text_end) break; + skip = reg->map[*(se + map_offset)]; t = s; do { s += enclen(reg->enc, s); @@ -4164,13 +4164,13 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, { const UChar *s, *t, *p, *end; const UChar *tail; - int enc_minlen; + int map_offset; end = text_range + (target_end - target); if (end > text_end) end = text_end; - enc_minlen = ONIGENC_MBC_MINLEN(reg->enc); + map_offset = reg->map_offset; tail = target_end - 1; s = text + (tail - target); @@ -4181,8 +4181,8 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, if (t == target) return (UChar* )p; p--; t--; } - if (s + enc_minlen >= text_end) break; - s += reg->map[*(s + enc_minlen)]; + if (s + map_offset >= text_end) break; + s += reg->map[*(s + map_offset)]; } return (UChar* )NULL; diff --git a/src/regint.h b/src/regint.h index 826e165..844de3c 100644 --- a/src/regint.h +++ b/src/regint.h @@ -294,14 +294,15 @@ struct re_pattern_buffer { int optimize; /* optimize flag */ int threshold_len; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ int sub_anchor; /* start-anchor for exact or map */ unsigned char *exact; unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - OnigLen dmin; /* min-distance of exact or map */ - OnigLen dmax; /* max-distance of exact or map */ + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BMH skip or char-map */ + int map_offset; + OnigLen dmin; /* min-distance of exact or map */ + OnigLen dmax; /* max-distance of exact or map */ /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict */