From 0d233db56ae48054f9b7d1acaff17e21ee0d3831 Mon Sep 17 00:00:00 2001 From: Seiji Masugata Date: Thu, 21 Sep 2006 16:37:20 +0000 Subject: [PATCH] Updated bundled oniguruma library (used for multibyte regular expression) to 4.4.4. fixed some Segmentation Fault, and build fails in OSX(Xcode 2.4/gcc 4.0.1 problem). See #38452. --- ext/mbstring/config.m4 | 2 +- ext/mbstring/config.w32 | 4 ++ ext/mbstring/mbstring.c | 6 +-- ext/mbstring/oniguruma/HISTORY | 41 +++++++++++++++++++ ext/mbstring/oniguruma/config.h.in | 3 ++ ext/mbstring/oniguruma/index.html | 15 +++---- ext/mbstring/oniguruma/oniguruma.h | 4 +- ext/mbstring/oniguruma/regcomp.c | 34 ++++----------- ext/mbstring/oniguruma/regexec.c | 66 ++++-------------------------- ext/mbstring/oniguruma/regint.h | 12 ++---- 10 files changed, 80 insertions(+), 107 deletions(-) diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 638f910a03..79c412b141 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -103,7 +103,7 @@ esac if test "$PHP_MBREGEX_BACKTRACK" != "no"; then - AC_DEFINE([HAVE_MBREGEX_BACKTRACK],1,[whether to check multibyte regex backtrack]) + AC_DEFINE([USE_COMBINATION_EXPLOSION_CHECK],1,[whether to check multibyte regex backtrack]) fi PHP_MBSTRING_ADD_CFLAG([-DNOT_RUBY]) diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 1ab4ce9e97..9a084e62b3 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -3,6 +3,7 @@ ARG_ENABLE("mbstring", "multibyte string functions", "no"); ARG_ENABLE("mbregex", "multibyte regex support", "no"); +ARG_ENABLE("mbregex-backtrack", "check multibyte regex backtrack", "yes"); if (PHP_MBSTRING == "yes") { @@ -52,6 +53,9 @@ if (PHP_MBSTRING == "yes") { if (PHP_MBREGEX != "no") { AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h'); AC_DEFINE('HAVE_MBREGEX', 1); + if (PHP_MBREGEX_BACKTRACK != "no") { + AC_DEFINE('USE_COMBINATION_EXPLOSION_CHECK', 1); + } ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \ regenc.c regexec.c reggnu.c regparse.c regposerr.c \ regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring"); diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index bc6209a5be..a8cdb4f0f0 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1050,11 +1050,11 @@ PHP_MINFO_FUNCTION(mbstring) sprintf(buf, "%d.%d.%d", ONIGURUMA_VERSION_MAJOR,ONIGURUMA_VERSION_MINOR,ONIGURUMA_VERSION_TEENY); php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf); -#ifdef HAVE_MBREGEX_BACKTRACK +#ifdef USE_COMBINATION_EXPLOSION_CHECK php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On"); -#else /* HAVE_MBREGEX_BACKTRACK */ +#else /* USE_COMBINATION_EXPLOSION_CHECK */ php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off"); -#endif /* HAVE_MBREGEX_BACKTRACK */ +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ } #endif php_info_print_table_end(); diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index f844b07658..6c824a697b 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,5 +1,46 @@ History +2006/09/19: Version 4.4.4 + +2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/19: [impl] (thanks KOYAMA Tetsuji) + HAVE_STDARG_PROTOTYPES was not defined in Mac OS X + by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc... + +2006/09/15: Version 4.4.3 + +2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/15: [bug] (thanks Allan Odgaard) + out of range access in bm_search_notrev(). + (p < s) + +2006/09/08: Version 4.4.2 + +2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/08: [bug] (thanks K.Takata) + out of range access in bm_search_notrev(). +2006/09/04: [spec] (thanks K.Takata) + allow look-behind in negative look-behind. + ex. /(? and . */ #undef TIME_WITH_SYS_TIME +/* Define if combination explosion check */ +#undef USE_COMBINATION_EXPLOSION_CHECK + /* Version number of package */ #undef VERSION diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html index f72d9f2294..fbf4fc0955 100755 --- a/ext/mbstring/oniguruma/index.html +++ b/ext/mbstring/oniguruma/index.html @@ -8,7 +8,7 @@

Oniguruma

-2006/08/21 (C) K.Kosako +2006/09/19 (C) K.Kosako

@@ -37,7 +37,7 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16

What's new
    -
  • Version 4.3.1 released. (2006/08/21) +
  • Version 4.4.4 released. (2006/09/19)
  • Version 2.5.7 released. (2006/07/28)
@@ -70,10 +70,10 @@ It follows the BSD license in the case of the one except for it.
Download:
    -
  • Latest release version 4.3.1 (2006/08/21) Change Log -
  • 4.3.0 (2006/08/17) -
  • 4.2.2 (2006/08/03) -
  • 4.2.1 (2006/07/31) +
  • Latest release version 4.4.4 (2006/09/19) Change Log +
  • 4.4.3 (2006/09/15) +
  • 4.4.2 (2006/09/08) +
  • 4.4.1 (2006/08/29)
  • Latest release version 2.5.7 (2006/07/28) Change Log
  • 2.5.6 (2006/05/29)
  • 2.5.5 (2006/05/08) @@ -87,7 +87,7 @@ It follows the BSD license in the case of the one except for it.

    -
    Documents: (version 4.3.1) +
    Documents: (version 4.4.4)
    • Regular Expressions (Japanese: EUC-JP) @@ -109,6 +109,7 @@ It follows the BSD license in the case of the one except for it.
    • Oniguruma in RAA (Ruby Application Archive)
    • FreeBSD ports
    • SoftAntenna > Lib > Oniguruma (Japanese page) +
    • bregonig.dll (Japanese page)
    • cnRegex 4D Plugin (Mac OS X) (Japanese page)
    • new script language crowbar (Japanese page)
    • Delphi interface (Win32) (Japanese page) diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index 27c959d94b..a0107cbe35 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -37,8 +37,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 4 -#define ONIGURUMA_VERSION_MINOR 3 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_MINOR 4 +#define ONIGURUMA_VERSION_TEENY 4 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index 7bc891cf0b..9b862657d8 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -187,7 +187,6 @@ add_opcode(regex_t* reg, int opcode) } #ifdef USE_COMBINATION_EXPLOSION_CHECK - static int add_state_check_num(regex_t* reg, int num) { @@ -196,7 +195,7 @@ add_state_check_num(regex_t* reg, int num) BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); return 0; } -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ +#endif static int add_rel_addr(regex_t* reg, int addr) @@ -729,7 +728,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) /* anychar repeat */ if (NTYPE(qn->target) == N_ANYCHAR) { if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact)) + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; else return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; @@ -807,15 +806,11 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) if (is_anychar_star_qualifier(qn)) { r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact)) { + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT - : OP_ANYCHAR_ML_STAR_PEEK_NEXT)); + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT - : OP_ANYCHAR_STAR_PEEK_NEXT)); + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; if (CKN_ON) { r = add_state_check_num(reg, ckn); @@ -1530,7 +1525,7 @@ compile_length_tree(Node* node, regex_t* reg) else #endif if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); } else { @@ -1668,7 +1663,6 @@ compile_tree(Node* node, regex_t* reg) switch (n) { case 1: r = add_opcode(reg, OP_BACKREF1); break; case 2: r = add_opcode(reg, OP_BACKREF2); break; - case 3: r = add_opcode(reg, OP_BACKREF3); break; default: r = add_opcode(reg, OP_BACKREFN); if (r) return r; @@ -3659,7 +3653,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_ANCHOR_IN_LB \ ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) case ANCHOR_LOOK_BEHIND: { @@ -5490,7 +5484,6 @@ OnigOpInfoType OnigOpInfo[] = { { OP_BEGIN_POSITION, "begin-position", ARG_NON }, { OP_BACKREF1, "backref1", ARG_NON }, { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREF3, "backref3", ARG_NON }, { OP_BACKREFN, "backrefn", ARG_MEMNUM }, { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, @@ -5537,10 +5530,6 @@ OnigOpInfoType OnigOpInfo[] = { { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, { OP_STATE_CHECK_ANYCHAR_ML_STAR, "state-check-anychar-ml*", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT, - "state-check-anychar*-peek-next", ARG_SPECIAL }, - { OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT, - "state-check-anychar-ml*-peek-next", ARG_SPECIAL }, { -1, "", ARG_NON } }; @@ -5826,15 +5815,6 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, fprintf(f, ":%d:(%d)", scn, addr); break; - case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT: - case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - fprintf(f, ":%d", scn); - p_string(f, 1, bp); - bp += 1; - break; - default: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 93662fea7f..1a9567a3d0 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2071,53 +2071,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } STAT_OP_OUT; break; - - case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT: - STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (s < end) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - if (*p == *s) { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem); - } - n = enc_len(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - p++; - STAT_OP_OUT; - break; - - case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT: - STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (s < end) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - if (*p == *s) { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem); - } - n = enc_len(encode, s); - if (n >1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - p++; - STAT_OP_OUT; - break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ case OP_WORD: STAT_OP_IN(OP_WORD); @@ -2350,11 +2303,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto backref; break; - case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3); - mem = 3; - goto backref; - break; - case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); backref: @@ -3118,19 +3066,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, (int )text, (int )text_end, (int )text_range); #endif - tlen1 = (target_end - target) - 1; - end = text_range + tlen1; - if (end > text_end) - end = text_end; - tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + s = text; if (IS_NULL(reg->int_map)) { while (s < end) { p = se = s + tlen1; t = tail; - while (*p == *t && t >= target) { + while (t >= target && *p == *t) { p--; t--; } if (t < target) return (UChar* )s; @@ -3146,7 +3094,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = se = s + tlen1; t = tail; - while (*p == *t && t >= target) { + while (t >= target && *p == *t) { p--; t--; } if (t < target) return (UChar* )s; diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index aa293be459..c06bf57630 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -59,9 +59,6 @@ /* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL -#ifdef HAVE_MBREGEX_BACKTRACK -#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */ -#endif /* HAVE_MBREGEX_BACKTRACK */ #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR @@ -89,6 +86,7 @@ #define USE_VARIABLE_META_CHARS #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ /* #define USE_MULTI_THREAD_SYSTEM */ #define THREAD_ATOMIC_START /* depend on thread system */ #define THREAD_ATOMIC_END /* depend on thread system */ @@ -103,7 +101,9 @@ #include "version.h" #include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ +#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */ #define USE_MULTI_THREAD_SYSTEM + #define THREAD_ATOMIC_START DEFER_INTS #define THREAD_ATOMIC_END ENABLE_INTS #define THREAD_PASS rb_thread_schedule() @@ -600,7 +600,6 @@ enum OpCode { OP_BACKREF1, OP_BACKREF2, - OP_BACKREF3, OP_BACKREFN, OP_BACKREFN_IC, OP_BACKREF_MULTI, @@ -651,9 +650,7 @@ enum OpCode { OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ OP_STATE_CHECK, /* check only */ OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR, - OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT, - OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT + OP_STATE_CHECK_ANYCHAR_ML_STAR }; typedef int RelAddrType; @@ -747,7 +744,6 @@ typedef void* PointerType; #define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) #define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) #define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1 + SIZE_STATE_CHECK_NUM) #endif #define MC_ESC(enc) (enc)->meta_char_table.esc -- 2.40.0