if test "$PHP_MBREGEX_BACKTRACK" != "no"; then
- AC_DEFINE([HAVE_MBREGEX_BACKTRACK],1,[whether to check multibyte regex backtrack])
+ AC_DEFINE([USE_COMBINATION_EXPLOSION_CHECK],1,[whether to check multibyte regex backtrack])
fi
PHP_MBSTRING_ADD_CFLAG([-DNOT_RUBY])
ARG_ENABLE("mbstring", "multibyte string functions", "no");
ARG_ENABLE("mbregex", "multibyte regex support", "no");
+ARG_ENABLE("mbregex-backtrack", "check multibyte regex backtrack", "yes");
if (PHP_MBSTRING == "yes") {
if (PHP_MBREGEX != "no") {
AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h');
AC_DEFINE('HAVE_MBREGEX', 1);
+ if (PHP_MBREGEX_BACKTRACK != "no") {
+ AC_DEFINE('USE_COMBINATION_EXPLOSION_CHECK', 1);
+ }
ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \
regenc.c regexec.c reggnu.c regparse.c regposerr.c \
regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring");
sprintf(buf, "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,ONIGURUMA_VERSION_MINOR,ONIGURUMA_VERSION_TEENY);
php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
-#ifdef HAVE_MBREGEX_BACKTRACK
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
-#else /* HAVE_MBREGEX_BACKTRACK */
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
-#endif /* HAVE_MBREGEX_BACKTRACK */
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
}
#endif
php_info_print_table_end();
History
+2006/09/19: Version 4.4.4
+
+2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/19: [impl] (thanks KOYAMA Tetsuji)
+ HAVE_STDARG_PROTOTYPES was not defined in Mac OS X
+ by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc...
+
+2006/09/15: Version 4.4.3
+
+2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/15: [bug] (thanks Allan Odgaard)
+ out of range access in bm_search_notrev().
+ (p < s)
+
+2006/09/08: Version 4.4.2
+
+2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/08: [bug] (thanks K.Takata)
+ out of range access in bm_search_notrev().
+2006/09/04: [spec] (thanks K.Takata)
+ allow look-behind in negative look-behind.
+ ex. /(?<!(?<=a)b|c)d/
+
+2006/08/29: Version 4.4.1
+
+2006/08/29: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/29: [dist] (thanks Seiji Masugata)
+ add configure option --enable-combination-explosion-check
+
+2006/08/25: Version 4.4.0
+
+2006/08/25: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/25: [impl] add_state_check_num() should be enclosed in
+ ifdef USE_COMBINATION_EXPLOSION_CHECK.
+2006/08/23: [spec] config USE_COMBINATION_EXPLOSION_CHECK is enabled
+ in Ruby mode only.
+2006/08/22: [impl] remove last line comma in enum OpCode.
+2006/08/22: [impl] remove OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT and
+ OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT.
+2006/08/22: [impl] remove OP_BACKREF3.
+
2006/08/21: Version 4.3.1
2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
+/* Define if combination explosion check */
+#undef USE_COMBINATION_EXPLOSION_CHECK
+
/* Version number of package */
#undef VERSION
<h1>Oniguruma</h1>
<p>
-2006/08/21 (C) K.Kosako
+2006/09/19 (C) K.Kosako
</p>
<p>
<dt><b>What's new</b>
</font>
<ul>
-<li>Version 4.3.1 released. (2006/08/21)
+<li>Version 4.4.4 released. (2006/09/19)
<li>Version 2.5.7 released. (2006/07/28)
</ul>
</dl>
<dt><b>Download:</b>
<ul>
-<li> <a href="archive/onig-4.3.1.tar.gz">Latest release version 4.3.1</a> (2006/08/21) <a href="HISTORY_4X.txt">Change Log</a>
-<li> <a href="archive/onig-4.3.0.tar.gz">4.3.0</a> (2006/08/17)
-<li> <a href="archive/onig-4.2.2.tar.gz">4.2.2</a> (2006/08/03)
-<li> <a href="archive/onig-4.2.1.tar.gz">4.2.1</a> (2006/07/31)
+<li> <a href="archive/onig-4.4.4.tar.gz">Latest release version 4.4.4</a> (2006/09/19) <a href="HISTORY_4X.txt">Change Log</a>
+<li> <a href="archive/onig-4.4.3.tar.gz">4.4.3</a> (2006/09/15)
+<li> <a href="archive/onig-4.4.2.tar.gz">4.4.2</a> (2006/09/08)
+<li> <a href="archive/onig-4.4.1.tar.gz">4.4.1</a> (2006/08/29)
<li> <a href="archive/onigd2_5_7.tar.gz">Latest release version 2.5.7</a> (2006/07/28) <a href="HISTORY_2X.txt">Change Log</a>
<li> <a href="archive/onigd2_5_6.tar.gz">2.5.6</a> (2006/05/29)
<li> <a href="archive/onigd2_5_5.tar.gz">2.5.5</a> (2006/05/08)
<br>
<br>
-<dt><b>Documents:</b> (version 4.3.1)
+<dt><b>Documents:</b> (version 4.4.4)
<ul>
<li> <a href="doc/RE.txt">Regular Expressions</a>
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (Japanese page)
+<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll</a> (Japanese page)
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
<li> <a href="http://kmaebashi.com/">new script language crowbar</a> (Japanese page)
<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 4
-#define ONIGURUMA_VERSION_MINOR 3
-#define ONIGURUMA_VERSION_TEENY 1
+#define ONIGURUMA_VERSION_MINOR 4
+#define ONIGURUMA_VERSION_TEENY 4
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
}
#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
static int
add_state_check_num(regex_t* reg, int num)
{
BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
return 0;
}
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+#endif
static int
add_rel_addr(regex_t* reg, int addr)
/* anychar repeat */
if (NTYPE(qn->target) == N_ANYCHAR) {
if (qn->greedy && infinite) {
- if (IS_NOT_NULL(qn->next_head_exact))
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
else
return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
if (is_anychar_star_qualifier(qn)) {
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
- if (IS_NOT_NULL(qn->next_head_exact)) {
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT
- : OP_ANYCHAR_ML_STAR_PEEK_NEXT));
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT
- : OP_ANYCHAR_STAR_PEEK_NEXT));
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
if (r) return r;
if (CKN_ON) {
r = add_state_check_num(reg, ckn);
else
#endif
if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
}
else {
switch (n) {
case 1: r = add_opcode(reg, OP_BACKREF1); break;
case 2: r = add_opcode(reg, OP_BACKREF2); break;
- case 3: r = add_opcode(reg, OP_BACKREF3); break;
default:
r = add_opcode(reg, OP_BACKREFN);
if (r) return r;
#define ALLOWED_ANCHOR_IN_LB \
( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
case ANCHOR_LOOK_BEHIND:
{
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
{ OP_BACKREF1, "backref1", ARG_NON },
{ OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREF3, "backref3", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
{ OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
{ OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
{ OP_STATE_CHECK_ANYCHAR_ML_STAR,
"state-check-anychar-ml*", ARG_STATE_CHECK },
- { OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT,
- "state-check-anychar*-peek-next", ARG_SPECIAL },
- { OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT,
- "state-check-anychar-ml*-peek-next", ARG_SPECIAL },
{ -1, "", ARG_NON }
};
fprintf(f, ":%d:(%d)", scn, addr);
break;
- case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT:
- case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT:
- scn = *((StateCheckNumType* )bp);
- bp += SIZE_STATE_CHECK_NUM;
- fprintf(f, ":%d", scn);
- p_string(f, 1, bp);
- bp += 1;
- break;
-
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp);
}
STAT_OP_OUT;
break;
-
- case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT:
- STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT);
-
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (s < end) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- if (*p == *s) {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem);
- }
- n = enc_len(encode, s);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- p++;
- STAT_OP_OUT;
- break;
-
- case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT:
- STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT);
-
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (s < end) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- if (*p == *s) {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem);
- }
- n = enc_len(encode, s);
- if (n >1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- p++;
- STAT_OP_OUT;
- break;
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
case OP_WORD: STAT_OP_IN(OP_WORD);
goto backref;
break;
- case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3);
- mem = 3;
- goto backref;
- break;
-
case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
GET_MEMNUM_INC(mem, p);
backref:
(int )text, (int )text_end, (int )text_range);
#endif
- tlen1 = (target_end - target) - 1;
- end = text_range + tlen1;
- if (end > text_end)
- end = text_end;
-
tail = target_end - 1;
+ tlen1 = tail - target;
+ end = text_range;
+ if (end + tlen1 > text_end)
+ end = text_end - tlen1;
+
s = text;
if (IS_NULL(reg->int_map)) {
while (s < end) {
p = se = s + tlen1;
t = tail;
- while (*p == *t && t >= target) {
+ while (t >= target && *p == *t) {
p--; t--;
}
if (t < target) return (UChar* )s;
while (s < end) {
p = se = s + tlen1;
t = tail;
- while (*p == *t && t >= target) {
+ while (t >= target && *p == *t) {
p--; t--;
}
if (t < target) return (UChar* )s;
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
-#ifdef HAVE_MBREGEX_BACKTRACK
-#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
-#endif /* HAVE_MBREGEX_BACKTRACK */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
+#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
#define USE_MULTI_THREAD_SYSTEM
+
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
OP_BACKREF1,
OP_BACKREF2,
- OP_BACKREF3,
OP_BACKREFN,
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
OP_STATE_CHECK, /* check only */
OP_STATE_CHECK_ANYCHAR_STAR,
- OP_STATE_CHECK_ANYCHAR_ML_STAR,
- OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT,
- OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
};
typedef int RelAddrType;
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
-#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1 + SIZE_STATE_CHECK_NUM)
#endif
#define MC_ESC(enc) (enc)->meta_char_table.esc