From: Moriyoshi Koizumi Date: Sat, 28 Feb 2004 20:45:27 +0000 (+0000) Subject: - Upgrade bundled oniguruma regex library to 2.2.2 (Rui, Moriyoshi) X-Git-Tag: RELEASE_0_2_0~128 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=11b0f469f00bf2b80d453dbff1aa274db5dfb500;p=php - Upgrade bundled oniguruma regex library to 2.2.2 (Rui, Moriyoshi) . Supports various encodings such as BIG5, GB2312 and ISO-8859-* . Fixes bug #26677 (mbstring compile errors with IRIX) . Many thanks to K.Kosako. - Remove redundant files that are not relevant to the build. --- diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 9cff51897f..ba76165bfc 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -80,7 +80,32 @@ int main() { return foo(10, "", 3.14); } oniguruma/regexec.c oniguruma/reggnu.c oniguruma/regparse.c + oniguruma/regenc.c oniguruma/regposerr.c + oniguruma/enc/ascii.c + oniguruma/enc/utf8.c + oniguruma/enc/euc_jp.c + oniguruma/enc/euc_tw.c + oniguruma/enc/euc_kr.c + oniguruma/enc/sjis.c + oniguruma/enc/iso8859_1.c + oniguruma/enc/iso8859_2.c + oniguruma/enc/iso8859_3.c + oniguruma/enc/iso8859_4.c + oniguruma/enc/iso8859_5.c + oniguruma/enc/iso8859_6.c + oniguruma/enc/iso8859_7.c + oniguruma/enc/iso8859_8.c + oniguruma/enc/iso8859_9.c + oniguruma/enc/iso8859_10.c + oniguruma/enc/iso8859_11.c + oniguruma/enc/iso8859_13.c + oniguruma/enc/iso8859_14.c + oniguruma/enc/iso8859_15.c + oniguruma/enc/iso8859_16.c + oniguruma/enc/koi8.c + oniguruma/enc/koi8_r.c + oniguruma/enc/big5.c ]) fi ]) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 2a1944db83..d0fb5db869 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -610,10 +610,10 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) MBSTRG(current_internal_encoding) = no_encoding; #if HAVE_MBREGEX { - php_mb_reg_char_encoding mbctype; + OnigEncoding mbctype; mbctype = php_mb_regex_name2mbctype(new_value); - if (mbctype == REGCODE_UNDEF) { - mbctype = REGCODE_EUCJP; + if (mbctype == ONIG_ENCODING_UNDEF) { + mbctype = ONIG_ENCODING_EUC_JP; } MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype; } @@ -995,7 +995,7 @@ PHP_MINFO_FUNCTION(mbstring) php_info_print_table_end(); php_info_print_table_start(); - php_info_print_table_colspan_header(2, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1."); + php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1."); php_info_print_table_end(); DISPLAY_INI_ENTRIES(); diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index d9627fced7..0373fc894f 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,11 +1,361 @@ History +2004/02/27: Version 2.2.2 + +2004/02/27: [impl] fix the position of onig_stat_print(). +2004/02/27: [impl] define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION() in regint.h + for ignored by RDoc. + +2004/02/26: Version 2.2.1 + +2004/02/26: [bug] invalid definition at onig_error_code_to_str() + in the case of NOT HAVE_STDARG_PROTOTYPES. + +2004/02/25: Version 2.2.0 + +2004/02/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/02/24: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux]. +2004/02/24: [bug] undefined IS_BLANK() and IS_GRAPH() was used in + onigenc_is_code_ctype() in the case of Ruby M17N. +2004/02/24: [new] support ISO-8859-16. (ONIG_ENCODING_ISO_8859_16) +2004/02/24: [bug] should not fold match for 0xdf in iso8859_6.c. +2004/02/24: [new] support ISO-8859-14. (ONIG_ENCODING_ISO_8859_14) +2004/02/23: [new] support ISO-8859-13. (ONIG_ENCODING_ISO_8859_13) +2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10) +2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig(). +2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9) +2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4, + ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R. +2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex. +2004/02/17: [spec] check capture status for empty infinite loop. + [ruby-dev:20224] etc... + ex. /(?:\1a|())*/.match("a"), + /(?:()|()|()|(x)|()|())*\2b\5/.match("b") + add USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK. + add OP_NULL_CHECK_END_MEMST, OP_NULL_CHECK_END_MEMST_PUSH. + add stack type STK_NULL_CHECK_END. +2004/02/13: [impl] add OnigEncodingEUC_CN to enc/euc_kr.c. +2004/02/13: [bug] (thanks Simon Strandgaard) + parsing of nested repeat was invalid. + ex. /ab{2,3}*/ was /(?:a(?:b{2,3}))*/, + should be /a(?:b{2,3}*)/ +2004/02/12: [bug] (thanks Simon Strandgaard) + OP_REPEAT_INC_NG process in match_at() is wrong. + ex. bad match /a.{0,2}?a/ =~ "0aXXXa0" +2004/02/12: [bug] (thanks Simon Strandgaard) + wrong fetch after (?x) option. ex. "(?x)\ta .\n+b" +2004/02/12: [bug] (thanks Simon Strandgaard) + [\^] is not a empty char class. +2004/02/09: [new] add onig_set_syntax_op(), onig_set_syntax_op2(), + onig_set_syntax_behavior(), onig_set_syntax_options(). +2004/02/06: [dist] add a new target 'site' to Makefile.in. +2004/02/06: [dist] add index.html. +2004/02/03: [bug] oniggnu.h was not installed by 'make install'. + +2004/02/02: Version 2.1.0 + +2004/02/02: [test] success in ruby 1.9.0 (2004-02-02) [i686-linux]. +2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/02/02: [new] support ISO-8859-11. (ONIG_ENCODING_ISO_8859_11) +2004/02/02: [new] support ISO-8859-5. (ONIG_ENCODING_ISO_8859_5) +2004/02/02: [impl] should check single byte encoding or not in and_cclass() + and or_cclass(). +2004/01/30: [dist] add oniggnu.h. +2004/01/30: [bug] ISO-8859-7 0xb7 (middle dot) is Punct type. +2004/01/30: [new] support ISO-8859-8. (ONIG_ENCODING_ISO_8859_8) +2004/01/29: [new] support ISO-8859-7. (ONIG_ENCODING_ISO_8859_7) +2004/01/29: [new] support ISO-8859-6. (ONIG_ENCODING_ISO_8859_6) +2004/01/28: [new] support KOI8-R. (ONIG_ENCODING_KOI8_R) +2004/01/28: [new] support KOI8. (ONIG_ENCODING_KOI8) +2004/01/27: [dist] rename enc/isotable.c to enc/mktable.c. +2004/01/27: [new] support ISO-8859-4. (ONIG_ENCODING_ISO_8859_4) +2004/01/26: [new] support ISO-8859-3. (ONIG_ENCODING_ISO_8859_3) +2004/01/26: [bug] EncISO_8859_{1,15}_CtypeTable[256] was wrong. + (0x80 - 0xff is not ASCII) +2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2) +2004/01/23: [dist] add enc/isotable.c. +2004/01/22; [new] support EUC-TW. (ONIG_ENCODING_EUC_TW) +2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and + ALIGNMENT_RIGHT() was wrong. + type casting should be unsigned int, not int. +2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__) + to unaligned word access condition. (AMD64 ?) +2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c. +2004/01/21; [new] support EUC-KR. (ONIG_ENCODING_EUC_KR) +2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/01/20: [dist] change Makefile.in. +2004/01/20: [spec] add \p{...}, \P{...} in char class. +2004/01/20: [new] character property operators \p{...}, \P{...}. + supported in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL. +2004/01/19: [spec] allow /a{,n}/ as /a{0,n}/. (but don't allow /a{,}/) +2004/01/19: [dist] rename onigcomp200.h to onigcmpt200.h. +2004/01/19: [dist] update re.c.168.patch. svn add re.c.181.patch. +2004/01/16: [dist] update sample/*.c for new API. +2004/01/16: [dist] add onigcomp200.h. (for old API compatibility) +2004/01/16: [dist] update documents API, RE and RE.ja. +2004/01/16: [spec] change prefix REG_ -> ONIG_, regex_ onig_, + ENC_ -> ONIGENC, enc_ -> onigenc_. +2004/01/15: [impl] rename ENC_IS_MBC_E_WORD() to ENC_IS_MBC_WORD(). + rename ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY to + ENC_CTYPE_SUPPORT_LEVEL_SB. +2004/01/14: [impl] rename UNALIGNED_WORD_ACCESS to + PLATFORM_UNALIGNED_WORD_ACCESS. +2004/01/14: [impl] change MATCH_STACK_LIMIT_SIZE value from 200000 to 500000. +2004/01/13: [impl] remove ENC_CODE_TO_MBC_FIRST(enc,code) in regenc.h. + remove code_to_mbc_first member in RegCharEncodingType. +2004/01/13: [impl] remove head byte bitset information in cclass->mbuf. +2003/12/26: [impl] change macro name ismb_xxxx() in enc/*.c for + escape conflict. + +2003/12/24: Version 2.0.0 + +2003/12/24: [spec] ignore case option is effective to numbered char. + ex. /\x61/i =~ "A" +2003/12/24: [test] success in ruby 1.8.1 (2003-12-24) [i686-linux]. +2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2003/12/24: [test] success in regex.c compile test on ruby-m17n. + (but can't make miniruby because re.c patch fail.) +2003/12/24: [bug] (thanks H.Miyamoto) /[\W]/ was wrong in 1.9.5. +2003/12/22: [spec] implement fold match on UTF-8 encoding. +2003/12/19: [impl] add ctype_support_level and ctype_add_codes() member to + RegCharEncoding type. +2003/12/19: [impl] add add_ctype_to_cc() in regparse.c. +2003/12/19: [impl] add enc_is_code_ctype() in REG_RUBY_M17N case. +2003/12/19: [impl] change ENC_CODE_TO_MBC() interface. +2003/12/18: [new] implement fold match. (variable number of char + match in ignore case mode.) + ex. German alphabet ess-tsett(U+00DF) match "SS" and "ss". +2003/12/17: [impl] refactoring of encoding system. +2003/12/17: [impl] add enc_init() in regenc.c. +2003/12/17: [new] support Big5. (REG_ENCODING_BIG5) +2003/12/16: [impl] change CodePoint from unsigned int to unsigned long. +2003/12/16: [new] support ISO 8859-15. (REG_ENCODING_ISO_8859_15) +2003/12/16: [impl] change P_() macro definition condition for Win32. +2003/12/16: [dist] add sample/encode.c +2003/12/16: [new] support ISO 8859-1. (REG_ENCODING_ISO_8859_1) +2003/12/15: [impl] rename IS_ENC_XXXX to ENC_IS_XXXX. +2003/12/15: [impl] rename RegDefaultCharEncoding to EncDefaultCharEncoding. +2003/12/15: [impl] divide encoding files. (enc/ascii.c, enc/utf8.c etc...) +2003/12/15: [bug] unexpected infinite loop in regex_snprintf_with_pattern(). + change local var. type char* to UChar*. +2003/12/15: [impl] remove REG_MBLEN_TABLE[]. +2003/12/15: [spec] rename function prefix regex_get_prev_char_head(), + regex_get_left_adjust_char_head() and + regex_get_right_adjust_char_head() to enc_xxxxxx(). +2003/12/15: [impl] rename function prefixes in regenc.h from regex_ to enc_. +2003/12/12: [impl] remove USE_SBMB_CLASS. +2003/12/12: [impl] rename mb -> mbc, mblen() to enc_len(). +2003/12/12: [impl] rename WCINT to CodePoint. +2003/12/11: [impl] delete IS_XXXX() ctype macros from regint.h. +2003/12/11: [impl] add enc->wc_is_ctype() and RegAsciiCtypeTable[256]. +2003/12/11: [impl] remove RegAsciiCaseAmbigTable. +2003/12/10: [impl] use ENC_TO_LOWER() for ignore case comparison. +2003/12/08: [impl] *** re-defined RegCharEncoding in oniguruma.h. *** +2003/12/08: [impl] add USE_POSIX_REGION_OPTION to regint.h. +2003/12/08: [impl] add IS_ENC_WORD() to regenc.h. +2003/12/05: [impl] rename IS_CODE_XXXX() to IS_ENC_XXXX(). +2003/12/05: [impl] delete IS_CODE_WORD() from regenc.h. +2003/12/04: [spec] rename REG_SYN_OP_BACK_REF to REG_SYN_OP_DECIMAL_BACKREF. +2003/12/04: [spec] add (REG_SYN_OP_ESC_W_WORD | REG_SYN_OP_ESC_B_WORD_BOUND | + REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | REG_SYN_OP_BACK_REF) + to RegSyntaxGrep. +2003/12/04: [spec] remove REG_ENCODING_DEFAULT and REGCODE_DEFAULT. +2003/12/04: [spec] move declarations of regex_get_default_encoding() and + regex_set_default_encoding() from oniguruma.h to regenc.h. +2003/12/03: [new] add regex_get_default_encoding() and + regex_set_default_encoding(). +2003/12/03: [spec] REG_ENCODING_DEFAULT meaning is changed. + (current default value, not initial default value.) +2003/12/03: [spec] REGCODE_XXX is obsoleted. use REG_ENCODING_XXX. +2003/12/02: [memo] alias svnst='svn status | grep -v "^\?"' +2003/12/02: [spec] move regex_set_default_trans_table() declaration + from oniguruma.h to regenc.h. (obsoleted API) +2003/12/02: [impl] move variables RegDefaultCharEncoding, DefaultTransTable and + AmbiguityTable to regenc.c. +2003/12/01: [impl] add regex_continuous_sbmb() to regenc.c. +2003/12/01: [dist] add regenc.h and regenc.c. +2003/11/18: [dist] change testconv.rb. +2003/11/18: [bug] (thanks Masaru Tsuda) + memory leak in parse_subexp(). +2003/11/18: [bug] (thanks Masaru Tsuda) + memory leak in names_clear() and parse_char_class(). +2003/11/17: [bug] memory leak in parse_char_class(). +2003/11/17: [bug] (thanks Masaru Tsuda) + OptExactInfo length should not over OPT_EXACT_MAXLEN. + (concat_opt_exact_info_str()) + +2003/11/12: Version 1.9.5 + +2003/11/12: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2003/11/12: [test] success in ruby 1.8.1 (2003-11-11) [i686-linux]. +2003/11/12: [spec] add definition of REG_INEFFECTIVE_META_CHAR. +2003/11/11: [dist] add a sample program sample/sql.c. +2003/11/11: [new] add variable meta character. + regex_set_meta_char() +2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS. +2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to + REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE, + REG_SYN_OP_QMARK_GROUP_EFFECT to + REG_SYN_OP2_QMARK_GROUP_EFFECT. +2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode. +2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT. +2003/11/05: [spec] rename REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED to + REG_SYN_WARN_CC_OP_NOT_ESCAPED. +2003/11/04: [new] add regex_set_warn_func() and regex_set_verb_warn_func(). +2003/10/30: [new] add regex_name_to_backref_number(). + (for multiplex definition name, see sample/names.c) +2003/10/30: [spec] add name_end and reg argument to callback function of + regex_foreach_name(). (see sample/names.c) +2003/10/29: [spec] add syntax behavior REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME. + add error code REGERR_MULTIPLEX_DEFINED_NAME. +2003/10/14: [dist] modify sample/simple.c. +2003/10/03: [bug] (thanks nobu) [ruby-dev:21472] + sub-anchor of optimization map info was wrong + in concat_left_node_opt_info(). + ex. /^(x?y)/ = "xy" fail. + +2003/09/17: Version 1.9.4 + +2003/09/17: [spec] change specification of char-class range in ignore case mode + follows with Ruby 1.8(2003-09-17). + ex. /[H-c]/i ==> (H-Z, 0x5b-0x60, a-c)/i + ==> H-Z, h-z, 0x5b-0x60, a-c, A-C +2003/09/16: [bug] (thanks Guy Decoux) + remove env->option == option check in parse_effect(). + change env->option for dynamic option in parse_exp(). + (ex. bad match /(?i)(?-i)a/ =~ "A") +2003/09/12: [spec] rename REG_SYN_ALLOW_RANGE_OP_IN_CC to + REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC, + REG_SYN_ESCAPE_IN_CC to REG_SYN_BACKSLASH_ESCAPE_IN_CC. +2003/09/11: [bug] change to IS_SYNTAX_OP2 at REG_SYN_OP2_ESC_GNU_BUF_ANCHOR. +2003/09/09: [spec] rename REG_SYN_OP2_ESC_M_BAR_META to + REG_SYN_OP2_ESC_CAPITAL_M_BAR_META, + REG_SYN_OP_ESC_Q_QUOTE to REG_SYN_OP_ESC_CAPITAL_Q_QUOTE, + REG_SYN_OP_ESC_SUBEXP to REG_SYN_OP_ESC_LPAREN_SUBEXP, + REG_SYN_OP_ESC_BUF_ANCHOR to REG_SYN_OP_ESC_AZ_BUF_ANCHOR, + REG_SYN_OP_ESC_GNU_BUF_ANCHOR to + REG_SYN_OP2_ESC_GNU_BUF_ANCHOR, + REG_SYN_OP_ESC_CONTROL_CHAR to REG_SYN_OP_ESC_CONTROL_CHARS, + REG_SYN_OP_ESC_WORD to REG_SYN_OP_ESC_W_WORD, + REG_SYN_OP_ESC_WORD_BEGIN_END to + REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END, + REG_SYN_OP_ESC_WORD_BOUND to REG_SYN_OP_ESC_B_WORD_BOUND, + REG_SYN_OP_ESC_WHITE_SPACE to REG_SYN_OP_ESC_S_WHITE_SPACE, + REG_SYN_OP_ESC_DIGIT to REG_SYN_OP_ESC_D_DIGIT, + REG_SYN_OP_CC to REG_SYN_OP_BRACKET_CC, + REG_SYN_OP2_CCLASS_SET to REG_SYN_OP2_CCLASS_SET_OP, + REG_SYN_CONTEXT_INDEP_OPS to + REG_SYN_CONTEXT_INDEP_REPEAT_OPS, + REG_SYN_CONTEXT_INVALID_REPEAT_OPS to + REG_SYN_CONTEXT_INVALID_REPEAT_OPS. + add REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR. +2003/09/08: [spec] rename REG_SYN_OP_ANYCHAR to REG_SYN_OP_DOT_ANYCHAR, + REG_SYN_OP_0INF to REG_SYN_OP_ASTERISK_ZERO_INF, + REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_ASTERISK_ZERO_INF, + REG_SYN_OP_1INF to REG_SYN_OP_PLUS_ONE_INF, + REG_SYN_OP_ESC_1INF to REG_SYN_OP_ESC_PLUS_ONE_INF, + REG_SYN_OP_0INF to REG_SYN_OP_QMARK_ZERO_ONE, + REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_QMARK_ZERO_ONE, + REG_SYN_OP_INTERVAL to REG_SYN_OP_BRACE_INTERVAL, + REG_SYN_OP_ESC_INTERVAL to REG_SYN_OP_ESC_BRACE_INTERVAL, + REG_SYN_OP_SUBEXP to REG_SYN_OP_LPAREN_SUBEXP, + REG_SYN_OP_ALT to REG_SYN_OP_VBAR_ALT, + REG_SYN_OP_ESC_ALT to REG_SYN_OP_ESC_VBAR_ALT, + REG_SYN_OP_NON_GREEDY to REG_SYN_OP_QMARK_NON_GREEDY, + REG_SYN_OP_SUBEXP_EFFECT to REG_SYN_OP_QMARK_GROUP_EFFECT, + REG_SYN_OP2_POSSESSIVE_{REPEAT,INTERVAL} to + REG_SYN_OP2_PLUS_POSSESSIVE_{REPEAT,INTERVAL}, + REG_SYN_OP2_SUBEXP_CALL to REG_SYN_OP2_ESC_G_SUBEXP_CALL, + REG_SYN_OP2_NAMED_GROUP to REG_SYN_OP2_QMARK_LT_NAMED_GROUP + and REG_SYN_OP2_ESC_K_NAMED_BACKREF. +2003/09/02: [tune] call reduce_nested_qualifier() after disabling capture for + no-name group in noname_disable_map(). + ex. /(a+)*(?...)/ +2003/09/02: [impl] include is forgotten to erase in regcomp.c. +2003/09/01: [dist] update doc/RE and doc/RE.ja. +2003/08/26: [bug] (thanks Guy Decoux) + should not double free node at the case TK_CC_CC_OPEN + in parse_char_class(). + +2003/08/19: Version 1.9.3 + +2003/08/19: [inst] change re.c.180.patch. +2003/08/19: [impl] rename 'list of captures' to 'capture history'. +2003/08/19: [dist] add doc/RE.ja. (Japanese) +2003/08/19: [new] add regex_copy_syntax(). +2003/08/19: [spec] rename REG_SYN_OP2_ATMARK_LIST_OF_CAPTURES to + REG_SYN_OP2_ATMARK_CAPTURE_HISTORY. +2003/08/18: [spec] (thanks nobu) + don't use IMPORT in oniguruma.h and onigposix.h. +2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb. +2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in. +2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1. +2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0. +2003/08/18: [bug] (thanks nobu) + single/multi-byte decision was wrong in parse_char_class(). + add regex_wc2mblen(). + should not set fetched to 1 in TK_RAW_BYTE case. +2003/08/18: [bug] should update BitSet in the case inc_n >= 0 + in add_wc_range_to_buf(). +2003/08/13: [bug] change re.c.180.patch for fix rb_reg_to_s() in re.c. +2003/08/11: [bug] should clear region->list in regex_region_resize(). + +2003/08/08: Version 1.9.2 + +2003/08/08: [test] success in ruby 1.8.0 (2003-08-08) on Windows 2000 + VC++ 6.0 and Cygwin. +2003/08/08: [impl] don't define macro vsnprintf for WIN32 platform, + because definition is added in win32\win32.h. +2003/08/08: [test] success in ruby 1.8.0 and ruby 1.6.8(2003-08-03) on Linux. +2003/08/08: [dist] change re.c.180.patch and re.c.168.patch. +2003/08/08: [new] (thanks akr) + implemented list of captures. (?@...), (?@...) +2003/08/07: [dist] add sample/listcap.c. +2003/08/06: [bug] OP_MEMORY_END_PUSH_REC case in match_at(). + renewal of mem_start_stk[] should be after + STACK_PUSH_MEM_END() call. +2003/07/29: [new] add regex_get_encoding(), regex_get_options() and + regex_get_syntax(). +2003/07/25: [spec] (thanks akr) + change group(...) to shy-group(?:...) if named group is + used in the pattern. + add REG_SYN_CAPTURE_ONLY_NAMED_GROUP. +2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to + REG_OPTION_DONT_CAPTURE_GROUP. + add REG_OPTION_CAPTURE_GROUP. +2003/07/17: [spec] rename REG_SYN_OP2_NAMED_SUBEXP to REG_SYN_OP2_NAMED_GROUP. +2003/07/17: [spec] add REGERR_EMPTY_GROUP_NAME. +2003/07/17: [spec] rename REGERR_INVALID_SUBEXP_NAME + to REGERR_INVALID_CHAR_IN_GROUP_NAME. +2003/07/17: [spec] restrict usable chars of group name to alphabet, digit, + '_' or multibyte-char in fetch_name(). [ruby-dev:20706] +2003/07/16: [impl] minor change of sample/names.c. +2003/07/14: [impl] rename USE_NAMED_SUBEXP to USE_NAMED_GROUP. +2003/07/14: [bug] add fetch_name() for USE_NAMED_SUBEXP off case. +2003/07/14: [API] add regex_number_of_names(). +2003/07/08: [impl] change error message for undefined group number call. + 'undefined group reference: /(a)\g<2>/' + --> 'undefined group <2> reference: /(a)\g<2>/' +2003/07/08: [dist] modify doc/RE. +2003/07/07: [impl] OP_SET_OPTION is not needed in compiled code. + add IS_DYNAMIC_OPTION() to regint.h. +2003/07/07: [spec] called group should not ignore outside option (?i:...). + ex. /(?i:(?(a)\2)){0}\g/.match("aA") + add opcode OP_BACKREFN_IC and OP_BACKREF_MULTI_IC. + set option status to effect memory in optimize_node_left(). +2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and + OP_ANYCHAR_ML_START_PEEK_NEXT. +2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1. +2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE. + 2003/07/04: Version 1.9.1 2003/07/04: [new] add REG_OPTION_CAPTURE_ONLY_NAMED_GROUP. (thanks .NET) 2003/07/04: [spec] check mbuf member in the case of REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC in parse_char_class(). -2003/07/04: [impl] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. +2003/07/04: [spec] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. should be REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED. 2003/07/04: [bug] conflict values on REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED and REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC. (thanks nobu) @@ -39,7 +389,7 @@ History 2003/06/20: Version 1.9.0 -2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY. +2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY. (thanks akr) 2003/06/19: [inst] rename regex.h to oniguruma.h. 2003/06/18: [impl] change REG_EXTERN setting condition. (__CYGWIN__) 2003/06/18: [bug] return wrong result UTF-8 case in regex_mb2wc(). @@ -55,7 +405,7 @@ History 2003/06/13: [bug] should use -DIMPORT for link with DLL in win32/Makefile. 2003/06/13: [dist] add sample/names.c 2003/06/12: [bug] range should be from - 1 in not_wc_range_buf(). -2003/06/12: [spec] should warn for '-' befor '&&' operator in char-class. +2003/06/12: [spec] should warn for '-' before '&&' operator in char-class. 2003/06/12: [new] add REG_SYNTAX_PERL. 2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. 2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]] @@ -507,6 +857,7 @@ History -- [bug: bug fix] +[API: API change/new/delete] [new: new feature] [spec: specification change] [impl: implementation change] @@ -515,3 +866,6 @@ History [dist: distribution change] [test: test] [memo: memo] +-- + +svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX" diff --git a/ext/mbstring/oniguruma/INSTALL-RUBY b/ext/mbstring/oniguruma/INSTALL-RUBY deleted file mode 100644 index ea214b6127..0000000000 --- a/ext/mbstring/oniguruma/INSTALL-RUBY +++ /dev/null @@ -1,48 +0,0 @@ -INSTALL-RUBY 2003/06/12 - -The way of installing into Ruby is shown. -First, prepare for the source of Ruby. -(http://www.ruby-lang.org/) - -A. Unix or Cygwin platform -B. Win32 platform (VC++) - - -A. Unix or Cygwin platform - - (in oniguruma directory) - 1. ./configure --with-rubydir= - 2. make 16 # for Ruby 1.6.8 - or - make 18 # for Ruby 1.8.0 - - Or you can specify ruby source directory. - (ex. make 16 RUBYDIR=../ruby) - - (in ruby source directory) - 3. ./configure (** If it doesn't go yet. **) - 4. make clean - 5. make - - - * test (ASCII and EUC-JP) - - (in oniguruma directory) - 6. make rtest - Or you can specify ruby program directory. - (ex. make rtest RUBYDIR=/usr/local/bin) - - -B. Win32 platform (VC++) - - * Requirement: Visual C++, patch.exe - - (in oniguruma directory) - 1. copy win32\Makefile Makefile - 2. nmake 16 RUBYDIR= # for Ruby 1.6.8 - or - nmake 18 RUBYDIR= # for Ruby 1.8.0 - - 3. Follow \win32\README.win32 description... - -// END diff --git a/ext/mbstring/oniguruma/Makefile.in b/ext/mbstring/oniguruma/Makefile.in deleted file mode 100644 index fd79cfb24f..0000000000 --- a/ext/mbstring/oniguruma/Makefile.in +++ /dev/null @@ -1,188 +0,0 @@ -# Oni Guruma Makefile - -product_name = oniguruma -dist_tag = `date '+%Y%m%d'` - -SHELL = /bin/sh -AUTOCONF = autoconf - -CPPFLAGS = -CFLAGS = @CFLAGS@ @STATISTICS@ -LDFLAGS = -LOADLIBES = -AR = ar -ARFLAGS = rc -RANLIB = @RANLIB@ -INSTALL = install -c -CP = cp -p -CC = @CC@ -DEFS = @DEFS@ -DNOT_RUBY -RUBYDIR = @RUBYDIR@ -WIN32 = win32 -DOC = doc - -srcdir = @srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir = $(exec_prefix)/lib -includedir = $(prefix)/include - -subdirs = - -libname = libonig.a - -onigintheaders = regint.h regparse.h -onigheaders = oniguruma.h $(onigintheaders) -posixheaders = onigposix.h -headers = $(posixheaders) $(onigheaders) - -onigobjs = regerror.o regparse.o regcomp.o regexec.o reggnu.o -posixobjs = regposix.o regposerr.o -libobjs = $(onigobjs) $(posixobjs) - -onigsources = regerror.c regparse.c regcomp.c regexec.c reggnu.c -posixsources = regposix.c regposerr.c -libsources = $(posixsources) $(onigsources) -rubysources = regex.c $(onigsources) - -patchfiles = re.c.168.patch re.c.180.patch -distfiles = README COPYING INSTALL-RUBY HISTORY \ - .cvsignore Makefile.in configure.in config.h.in configure \ - $(headers) $(libsources) regex.c $(patchfiles) \ - test.rb testconv.rb $(testc).c -win32distfiles = $(WIN32)/Makefile $(WIN32)/config.h $(WIN32)/testc.c -docfiles = $(DOC)/API $(DOC)/RE - -samplefiles = sample/*.c - -testc = testc -testp = testp - -makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' - -.SUFFIXES: -.SUFFIXES: .o .c .h .ps .dvi .info .texinfo - -.c.o: - $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $< - -# targets -default: all - -all: $(libname) - -$(libname): $(libobjs) - rm -f $(libname) - $(AR) $(ARFLAGS) $(libname) $(libobjs) - $(RANLIB) $(libname) - -regparse.o: regparse.c $(onigheaders) config.h -regcomp.o: regcomp.c $(onigheaders) config.h -regexec.o: regexec.c regint.h oniguruma.h config.h -reggnu.o: reggnu.c regint.h oniguruma.h config.h -regerror.o: regerror.c regint.h oniguruma.h config.h -regposix.o: regposix.c $(posixheaders) oniguruma.h config.h -regposerr.o: regposerr.c $(posixheaders) config.h - -install: all - test -d $(libdir) || mkdir $(libdir) - test -d $(includedir) || mkdir $(includedir) - $(INSTALL) $(libname) $(libdir)/$(libname) - $(RANLIB) $(libdir)/$(libname) - $(INSTALL) $(srcdir)/oniguruma.h $(includedir)/oniguruma.h - $(INSTALL) $(srcdir)/onigposix.h $(includedir)/onigposix.h - -uninstall: - -rm -f $(libdir)/$(libname) - -rm -f $(includedir)/oniguruma.h - -# Ruby test -rtest: - $(RUBYDIR)/ruby -w -Ke test.rb - -# C library test -ctest: $(testc) - ./$(testc) - -# POSIX C library test -ptest: $(testp) - ./$(testp) - -$(testc): $(testc).c $(libname) - $(CC) $(CFLAGS) -o $@ $(testc).c $(libname) - -$(testp): $(testc).c $(libname) - $(CC) -DPOSIX_TEST $(CFLAGS) -o $@ $(testc).c $(libname) - -$(testc).c: test.rb testconv.rb - ruby -Ke testconv.rb < test.rb > $@ - -$(WIN32)/$(testc).c: test.rb testconv.rb - ruby -Ke testconv.rb -win < test.rb | nkf -cs > $@ - -clean: - rm -f *.o $(libname) $(testc) $(testp) $(testc) *~ win32/*~ - -distclean: clean - rm -f Makefile config.status - - -16: cpruby - patch -d $(RUBYDIR) -p0 < re.c.168.patch - -18: cpruby - patch -d $(RUBYDIR) -p0 < re.c.180.patch - -# backup file suffix -SORIG = ruby_orig - -cpruby: - $(CP) $(RUBYDIR)/regex.c $(RUBYDIR)/regex.c.$(SORIG) - $(CP) $(RUBYDIR)/regex.h $(RUBYDIR)/regex.h.$(SORIG) - $(CP) $(RUBYDIR)/re.c $(RUBYDIR)/re.c.$(SORIG) - $(CP) $(rubysources) $(onigintheaders) $(RUBYDIR) - $(CP) oniguruma.h $(RUBYDIR)/regex.h - -rback: - $(CP) $(RUBYDIR)/regex.c.$(SORIG) $(RUBYDIR)/regex.c - $(CP) $(RUBYDIR)/regex.h.$(SORIG) $(RUBYDIR)/regex.h - $(CP) $(RUBYDIR)/re.c.$(SORIG) $(RUBYDIR)/re.c - -samples: - $(CC) $(CFLAGS) -I. -o sample/simple sample/simple.c $(libname) - $(CC) $(CFLAGS) -I. -o sample/posix sample/posix.c $(libname) - $(CC) $(CFLAGS) -I. -o sample/names sample/names.c $(libname) - -configure: configure.in - $(AUTOCONF) - -config.status: configure - $(SHELL) ./config.status --recheck - -Makefile: Makefile.in config.status - $(SHELL) ./config.status - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -manifest: - for file in $(distfiles); do echo $$file; done - - -distdir = $(product_name) - -dist_auto: $(testc).c $(WIN32)/$(testc).c - -dist: configure dist_auto - rm -rf $(distdir) - mkdir $(distdir) - mkdir $(distdir)/$(DOC) - mkdir $(distdir)/$(WIN32) - mkdir $(distdir)/sample - ln $(distfiles) $(distdir) - ln $(docfiles) $(distdir)/$(DOC) - ln $(win32distfiles) $(distdir)/$(WIN32) - ln $(samplefiles) $(distdir)/sample - tar chf - $(distdir) | gzip > onigd$(dist_tag).tar.gz - rm -rf $(distdir) diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README index 130a6f7aa4..3880423f03 100644 --- a/ext/mbstring/oniguruma/README +++ b/ext/mbstring/oniguruma/README @@ -1,30 +1,43 @@ -README 2003/07/04 +README 2004/02/25 Oniguruma ---- (C) K.Kosako http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ +http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ -Oniguruma is a regular expression library. -The characteristics of this library is that different character encodings +Oniguruma is a regular expressions library. +The characteristics of this library is that different character encoding for every regular expression object can be specified. -(Supported character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS) + +Supported character encodings: + + ASCII, UTF-8, + EUC-JP, EUC-TW, EUC-KR, EUC-CN, + Shift_JIS, Big5, KOI8, KOI8-R, + ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, + ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, + ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 + There are two ways of using of it in this program. * Built-in regular expression engine of Ruby * C library (supported APIs: GNU regex, POSIX, Oniguruma native) +------------------------------------------------------------ Install -A. Install into Ruby +(A) Install into Ruby See INSTALL-RUBY. + (character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS) + -B. C library +(B) Install C library - B1. Unix, Cygwin + (B-1) Unix and Cygwin platform 1. ./configure 2. make @@ -32,20 +45,20 @@ B. C library (* uninstall: make uninstall) - * test (EUC-JP) + * test (ASCII/EUC-JP) 4. make ctest - B2. Win32 platform (VC++) + (B-2) Win32 platform (VC++) - 1. copy win32\config.h config.h - 2. copy win32\Makefile Makefile + 1. copy win32\Makefile Makefile + 2. copy win32\config.h config.h 3. nmake - onig_s.lib: static link library - onig.dll: dynamic link library + onig_s.lib: static link library + onig.dll: dynamic link library - * test (Shift_JIS) + * test (ASCII/Shift_JIS) 4. copy win32\testc.c testc.c 5. nmake ctest @@ -58,30 +71,72 @@ License It follows the BSD license in the case of the one except for it. -Source Files - oniguruma.h Oniguruma and GNU regex API header file - regint.h internal definitions - regparse.h internal definitions for regparse.c and regcomp.c - regparse.c parsing functions. - regcomp.c compiling and optimization functions - regerror.c error message function - regex.c source files wrapper for Ruby - regexec.c search and match functions - reggnu.c GNU regex API functions +Regular Expressions + + See doc/RE (or doc/RE.ja for Japanese). + - onigposix.h POSIX API header file - regposerr.c POSIX API error message function (regerror) - regposix.c POSIX API functions +Sample Programs sample/simple.c example of the minimum (native API) - sample/posix.c POSIX API sample. sample/names.c example of the named group callback. + sample/encode.c example of some encodings. + sample/listcap.c example of the capture history. + sample/posix.c POSIX API sample. + sample/sql.c example of the variable meta characters. + (SQL-like pattern matching) + +Source Files -Regular expression + oniguruma.h Oniguruma API header file. (public) + oniggnu.h GNU regex API header file. (public) + onigcmpt200.h Oniguruma API backward compatibility header file. (public) + (for 2.0.0 or more older version) + + regenc.h character encodings framework header file. + regint.h internal definitions + regparse.h internal definitions for regparse.c and regcomp.c + regcomp.c compiling and optimization functions + regenc.c character encodings framework. + regerror.c error message function + regex.c source files wrapper for Ruby + regexec.c search and match functions + regparse.c parsing functions. + reggnu.c GNU regex API functions + + onigposix.h POSIX API header file. (public) + regposerr.c POSIX error message function. + regposix.c POSIX functions. + + enc/mktable.c character type table generator. + enc/ascii.c ASCII encoding. + enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) + enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) + enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) + enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4) + enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic) + enc/iso8859_6.c ISO-8859-6 encoding. (Arabic) + enc/iso8859_7.c ISO-8859-7 encoding. (Greek) + enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew) + enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish) + enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic) + enc/iso8859_11.c ISO-8859-11 encoding. (Thai) + enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim) + enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic) + enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro) + enc/iso8859_16.c ISO-8859-16 encoding. + (Latin-10 or South-Eastern European with Euro) + enc/utf8.c UTF-8 encoding. + enc/euc_jp.c EUC-JP encoding. + enc/euc_tw.c EUC-TW encoding. + enc/euc_kr.c EUC-KR, EUC-CN encoding. + enc/sjis.c Shift_JIS encoding. + enc/koi8.c KOI8 encoding. + enc/koi8_r.c KOI8-R encoding. + enc/big5.c Big5 encoding. - See doc/RE. API differences with Japanized GNU regex(version 0.12) of Ruby @@ -93,22 +148,18 @@ API differences with Japanized GNU regex(version 0.12) of Ruby ToDo - 1 support 16-bit and 31-bit encodings. (UCS-2, UCS-4, UTF-16) - (each encoding has meta-character code table?) - - 2 if-then-else. (?(condition)then), (?(condition)then|else) + 1 support 16-bit encodings. (UTF-16) + 2 different encoding pattern with target. + (ex. ASCII/UTF-16, UTF-16 BE and UTF-16 LE) + 3 add enc/name.c (onigenc_get_enc_by_name(name)) - ? variable meta characters. - ? implement syntax behavior REG_SYN_CONTEXT_INDEP_ANCHORS. - ? pattern encoding different with target. - (ex. UCS-2 Big Endian and UCS-2 Little Endian) - ? better acess to hash table. + ? transmission stopper. (return ONIG_STOP from match_at()) + ? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. + ? better acess to hash table (st.c). non null-terminated key version st_lookup(). - (but it needs to modify st.[ch]) - ? character set specific POSIX bracket extensions. ([:hiragana:]) - ? grep-like tool 'onigrep'. (variable syntax option etc..) - ? check invalid wide char value in WC2MB, WC2MB_FIRST on Ruby M17N. - ? define THREAD_PASS in regint.h as rb_thread_pass(). - + ? grep-like tool 'onigrep'. + ? return parse tree of regexp pattern to application. + ?? /a{n}?/ should be interpreted as /(?:a{n})?/. + ?? \h hexadecimal digit char ([0-9a-fA-F]), \H not \h. and I'm thankful to Akinori MUSHA. diff --git a/ext/mbstring/oniguruma/configure b/ext/mbstring/oniguruma/configure new file mode 100755 index 0000000000..a63327d854 --- /dev/null +++ b/ext/mbstring/oniguruma/configure @@ -0,0 +1,5414 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.53. +# +# Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + + +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi + +# NLS nuisances. +# Support unset when possible. +if (FOO=FOO; unset FOO) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + +(set +x; test -n "`(LANG=C; export LANG) 2>&1`") && + { $as_unset LANG || test "${LANG+set}" != set; } || + { LANG=C; export LANG; } +(set +x; test -n "`(LC_ALL=C; export LC_ALL) 2>&1`") && + { $as_unset LC_ALL || test "${LC_ALL+set}" != set; } || + { LC_ALL=C; export LC_ALL; } +(set +x; test -n "`(LC_TIME=C; export LC_TIME) 2>&1`") && + { $as_unset LC_TIME || test "${LC_TIME+set}" != set; } || + { LC_TIME=C; export LC_TIME; } +(set +x; test -n "`(LC_CTYPE=C; export LC_CTYPE) 2>&1`") && + { $as_unset LC_CTYPE || test "${LC_CTYPE+set}" != set; } || + { LC_CTYPE=C; export LC_CTYPE; } +(set +x; test -n "`(LANGUAGE=C; export LANGUAGE) 2>&1`") && + { $as_unset LANGUAGE || test "${LANGUAGE+set}" != set; } || + { LANGUAGE=C; export LANGUAGE; } +(set +x; test -n "`(LC_COLLATE=C; export LC_COLLATE) 2>&1`") && + { $as_unset LC_COLLATE || test "${LC_COLLATE+set}" != set; } || + { LC_COLLATE=C; export LC_COLLATE; } +(set +x; test -n "`(LC_NUMERIC=C; export LC_NUMERIC) 2>&1`") && + { $as_unset LC_NUMERIC || test "${LC_NUMERIC+set}" != set; } || + { LC_NUMERIC=C; export LC_NUMERIC; } +(set +x; test -n "`(LC_MESSAGES=C; export LC_MESSAGES) 2>&1`") && + { $as_unset LC_MESSAGES || test "${LC_MESSAGES+set}" != set; } || + { LC_MESSAGES=C; export LC_MESSAGES; } + + +# Name of the executable. +as_me=`(basename "$0") 2>/dev/null || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conftest.sh + echo "exit 0" >>conftest.sh + chmod +x conftest.sh + if (PATH=".;."; conftest.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conftest.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH || test "${CDPATH+set}" != set || { CDPATH=$PATH_SEPARATOR; export CDPATH; } + + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +exec 6>&1 + +# +# Initializations. +# +ac_default_prefix=/usr/local +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +ac_unique_file="regex.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#if HAVE_SYS_TYPES_H +# include +#endif +#if HAVE_SYS_STAT_H +# include +#endif +#if STDC_HEADERS +# include +# include +#else +# if HAVE_STDLIB_H +# include +# endif +#endif +#if HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H +# include +# endif +# include +#endif +#if HAVE_STRINGS_H +# include +#endif +#if HAVE_INTTYPES_H +# include +#else +# if HAVE_STDINT_H +# include +# endif +#endif +#if HAVE_UNISTD_H +# include +#endif" + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_option in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } + fi +fi +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS +ac_env_CPP_set=${CPP+set} +ac_env_CPP_value=$CPP +ac_cv_env_CPP_set=${CPP+set} +ac_cv_env_CPP_value=$CPP + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +_ACEOF + + cat <<_ACEOF +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..) + --with-statistics take matching time statistical data + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have + headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + ac_popdir=`pwd` + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d $ac_dir || continue + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac +# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be +# absolute. +ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd` +ac_abs_top_builddir=`cd "$ac_dir" && cd $ac_top_builddir && pwd` +ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd` +ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd` + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi + cd $ac_popdir + done +fi + +test -n "$ac_init_help" && exit 0 +if $ac_init_version; then + cat <<\_ACEOF + +Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002 +Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit 0 +fi +exec 5>config.log +cat >&5 <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.53. Invocation command line was + + $ $0 $@ + +_ACEOF +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Also quote any args containing shell meta-characters. +ac_configure_args= +ac_sep= +for ac_arg +do + case $ac_arg in + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n ) continue ;; + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + continue ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + ac_sep=" " ;; + esac + # Get rid of the leading space. +done + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +{ + (set) 2>&1 | + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) + sed -n \ + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; + *) + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} + echo + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + sed "/^$/d" confdefs.h + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core core.* *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status + ' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + + + + +ac_config_headers="$ac_config_headers config.h" + + +RUBYDIR=".." + +# Check whether --with-rubydir or --without-rubydir was given. +if test "${with_rubydir+set}" = set; then + withval="$with_rubydir" + RUBYDIR=$withval +fi; + + +STATISTICS="" + +# Check whether --with-statistics or --without-statistics was given. +if test "${with_statistics+set}" = set; then + withval="$with_statistics" + STATISTICS=-DREG_DEBUG_STATISTICS +fi; + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + set dummy "$as_dir/$ac_word" ${1+"$@"} + shift + ac_cv_prog_CC="$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH" >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH" >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +echo "$as_me:$LINENO: checking for C compiler default output" >&5 +echo $ECHO_N "checking for C compiler default output... $ECHO_C" >&6 +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +for ac_file in `ls a_out.exe a.exe conftest.exe 2>/dev/null; + ls a.out conftest 2>/dev/null; + ls a.* conftest.* 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.o | *.obj | *.xcoff | *.tds | *.d | *.pdb | *.xSYM ) ;; + a.out ) # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool --akim. + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables" >&5 +echo "$as_me: error: C compiler cannot create executables" >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 + +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +rm -f a.out a.exe conftest$ac_cv_exeext +ac_clean_files=$ac_clean_files_save +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in `(ls conftest.exe; ls conftest; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.o | *.obj | *.xcoff | *.tds | *.d | *.pdb ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link" >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link" >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile" >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile" >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_compiler_gnu=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_prog_cc_g=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + ''\ + '#include ' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +$ac_declaration +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +continue +fi +rm -f conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_declaration +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +fi +rm -f conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +fi +rm -f conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + echo "$as_me:$LINENO: result: $RANLIB" >&5 +echo "${ECHO_T}$RANLIB" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_ac_ct_RANLIB" && ac_cv_prog_ac_ct_RANLIB=":" +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5 +echo "${ECHO_T}$ac_ct_RANLIB" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + RANLIB=$ac_ct_RANLIB +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + egrep -v '^ *\+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 + cat conftest.$ac_ext >&5 + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + egrep -v '^ *\+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 + cat conftest.$ac_ext >&5 + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6 +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + egrep -v '^ *\+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 + cat conftest.$ac_ext >&5 + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + egrep -v '^ *\+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 + cat conftest.$ac_ext >&5 + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check" >&5 +echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +#include +#include +#include + +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + egrep -v '^ *\+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_cv_header_stdc=yes +else + echo "$as_me: failed program was:" >&5 + cat conftest.$ac_ext >&5 + ac_cv_header_stdc=no +fi +rm -f conftest.err conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + exit(2); + exit (0); +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6 +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +eval "$as_ac_Header=no" +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + + + + + +for ac_header in stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_header_compiler=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + egrep -v '^ *\+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 + cat conftest.$ac_ext >&5 + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc in + yes:no ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;};; + no:yes ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;};; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +echo "$as_me:$LINENO: checking for int" >&5 +echo $ECHO_N "checking for int... $ECHO_C" >&6 +if test "${ac_cv_type_int+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +if ((int *) 0) + return 0; +if (sizeof (int)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_int=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_type_int=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_int" >&5 +echo "${ECHO_T}$ac_cv_type_int" >&6 + +echo "$as_me:$LINENO: checking size of int" >&5 +echo $ECHO_N "checking size of int... $ECHO_C" >&6 +if test "${ac_cv_sizeof_int+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$ac_cv_type_int" = yes; then + # The cast to unsigned long works around a bug in the HP C Compiler + # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects + # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. + # This bug is HP SR number 8606223364. + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (int))) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=0 ac_mid=0 + while :; do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (int))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo=`expr $ac_mid + 1` + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid + 1` +fi +rm -f conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (int))) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=-1 ac_mid=-1 + while :; do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (int))) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_hi=`expr '(' $ac_mid ')' - 1` + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid` +fi +rm -f conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo= ac_hi= +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (int))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo=`expr '(' $ac_mid ')' + 1` +fi +rm -f conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in +?*) ac_cv_sizeof_int=$ac_lo;; +'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (int), 77" >&5 +echo "$as_me: error: cannot compute sizeof (int), 77" >&2;} + { (exit 1); exit 1; }; } ;; +esac +else + if test "$cross_compiling" = yes; then + { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling" >&5 +echo "$as_me: error: cannot run test program while cross compiling" >&2;} + { (exit 1); exit 1; }; } +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +long longval () { return (long) (sizeof (int)); } +unsigned long ulongval () { return (long) (sizeof (int)); } +#include +#include +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + exit (1); + if (((long) (sizeof (int))) < 0) + { + long i = longval (); + if (i != ((long) (sizeof (int)))) + exit (1); + fprintf (f, "%ld\n", i); + } + else + { + unsigned long i = ulongval (); + if (i != ((long) (sizeof (int)))) + exit (1); + fprintf (f, "%lu\n", i); + } + exit (ferror (f) || fclose (f) != 0); + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_sizeof_int=`cat conftest.val` +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +( exit $ac_status ) +{ { echo "$as_me:$LINENO: error: cannot compute sizeof (int), 77" >&5 +echo "$as_me: error: cannot compute sizeof (int), 77" >&2;} + { (exit 1); exit 1; }; } +fi +rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +rm -f conftest.val +else + ac_cv_sizeof_int=0 +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_sizeof_int" >&5 +echo "${ECHO_T}$ac_cv_sizeof_int" >&6 +cat >>confdefs.h <<_ACEOF +#define SIZEOF_INT $ac_cv_sizeof_int +_ACEOF + + +echo "$as_me:$LINENO: checking for short" >&5 +echo $ECHO_N "checking for short... $ECHO_C" >&6 +if test "${ac_cv_type_short+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +if ((short *) 0) + return 0; +if (sizeof (short)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_short=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_type_short=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_short" >&5 +echo "${ECHO_T}$ac_cv_type_short" >&6 + +echo "$as_me:$LINENO: checking size of short" >&5 +echo $ECHO_N "checking size of short... $ECHO_C" >&6 +if test "${ac_cv_sizeof_short+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$ac_cv_type_short" = yes; then + # The cast to unsigned long works around a bug in the HP C Compiler + # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects + # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. + # This bug is HP SR number 8606223364. + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (short))) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=0 ac_mid=0 + while :; do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (short))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo=`expr $ac_mid + 1` + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid + 1` +fi +rm -f conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (short))) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=-1 ac_mid=-1 + while :; do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (short))) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_hi=`expr '(' $ac_mid ')' - 1` + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid` +fi +rm -f conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo= ac_hi= +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (short))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo=`expr '(' $ac_mid ')' + 1` +fi +rm -f conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in +?*) ac_cv_sizeof_short=$ac_lo;; +'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (short), 77" >&5 +echo "$as_me: error: cannot compute sizeof (short), 77" >&2;} + { (exit 1); exit 1; }; } ;; +esac +else + if test "$cross_compiling" = yes; then + { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling" >&5 +echo "$as_me: error: cannot run test program while cross compiling" >&2;} + { (exit 1); exit 1; }; } +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +long longval () { return (long) (sizeof (short)); } +unsigned long ulongval () { return (long) (sizeof (short)); } +#include +#include +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + exit (1); + if (((long) (sizeof (short))) < 0) + { + long i = longval (); + if (i != ((long) (sizeof (short)))) + exit (1); + fprintf (f, "%ld\n", i); + } + else + { + unsigned long i = ulongval (); + if (i != ((long) (sizeof (short)))) + exit (1); + fprintf (f, "%lu\n", i); + } + exit (ferror (f) || fclose (f) != 0); + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_sizeof_short=`cat conftest.val` +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +( exit $ac_status ) +{ { echo "$as_me:$LINENO: error: cannot compute sizeof (short), 77" >&5 +echo "$as_me: error: cannot compute sizeof (short), 77" >&2;} + { (exit 1); exit 1; }; } +fi +rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +rm -f conftest.val +else + ac_cv_sizeof_short=0 +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_sizeof_short" >&5 +echo "${ECHO_T}$ac_cv_sizeof_short" >&6 +cat >>confdefs.h <<_ACEOF +#define SIZEOF_SHORT $ac_cv_sizeof_short +_ACEOF + + +echo "$as_me:$LINENO: checking for long" >&5 +echo $ECHO_N "checking for long... $ECHO_C" >&6 +if test "${ac_cv_type_long+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +if ((long *) 0) + return 0; +if (sizeof (long)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_long=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_type_long=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_long" >&5 +echo "${ECHO_T}$ac_cv_type_long" >&6 + +echo "$as_me:$LINENO: checking size of long" >&5 +echo $ECHO_N "checking size of long... $ECHO_C" >&6 +if test "${ac_cv_sizeof_long+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$ac_cv_type_long" = yes; then + # The cast to unsigned long works around a bug in the HP C Compiler + # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects + # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. + # This bug is HP SR number 8606223364. + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long))) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=0 ac_mid=0 + while :; do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo=`expr $ac_mid + 1` + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid + 1` +fi +rm -f conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long))) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=-1 ac_mid=-1 + while :; do + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long))) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_lo=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_hi=`expr '(' $ac_mid ')' - 1` + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid` +fi +rm -f conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo= ac_hi= +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +static int test_array [1 - 2 * !(((long) (sizeof (long))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_hi=$ac_mid +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_lo=`expr '(' $ac_mid ')' + 1` +fi +rm -f conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in +?*) ac_cv_sizeof_long=$ac_lo;; +'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (long), 77" >&5 +echo "$as_me: error: cannot compute sizeof (long), 77" >&2;} + { (exit 1); exit 1; }; } ;; +esac +else + if test "$cross_compiling" = yes; then + { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling" >&5 +echo "$as_me: error: cannot run test program while cross compiling" >&2;} + { (exit 1); exit 1; }; } +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +$ac_includes_default +long longval () { return (long) (sizeof (long)); } +unsigned long ulongval () { return (long) (sizeof (long)); } +#include +#include +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + exit (1); + if (((long) (sizeof (long))) < 0) + { + long i = longval (); + if (i != ((long) (sizeof (long)))) + exit (1); + fprintf (f, "%ld\n", i); + } + else + { + unsigned long i = ulongval (); + if (i != ((long) (sizeof (long)))) + exit (1); + fprintf (f, "%lu\n", i); + } + exit (ferror (f) || fclose (f) != 0); + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_sizeof_long=`cat conftest.val` +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +( exit $ac_status ) +{ { echo "$as_me:$LINENO: error: cannot compute sizeof (long), 77" >&5 +echo "$as_me: error: cannot compute sizeof (long), 77" >&2;} + { (exit 1); exit 1; }; } +fi +rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +rm -f conftest.val +else + ac_cv_sizeof_long=0 +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_sizeof_long" >&5 +echo "${ECHO_T}$ac_cv_sizeof_long" >&6 +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG $ac_cv_sizeof_long +_ACEOF + + +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +fi +rm -f conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5 +echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6 +if test "${ac_cv_c_const+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +/* FIXME: Include the comments suggested by Paul. */ +#ifndef __cplusplus + /* Ultrix mips cc rejects this. */ + typedef int charset[2]; + const charset x; + /* SunOS 4.1.1 cc rejects this. */ + char const *const *ccp; + char **p; + /* NEC SVR4.0.2 mips cc rejects this. */ + struct point {int x, y;}; + static struct point const zero = {0,0}; + /* AIX XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in + an arm of an if-expression whose if-part is not a constant + expression */ + const char *g = "string"; + ccp = &g + (g ? g-g : 0); + /* HPUX 7.0 cc rejects these. */ + ++ccp; + p = (char**) ccp; + ccp = (char const *const *) p; + { /* SCO 3.2v4 cc rejects this. */ + char *t; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; + } + { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; + } + { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; + } + { /* AIX XL C 1.02.0.0 rejects this saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; }; + struct s *b; b->j = 5; + } + { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; + } +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_c_const=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_c_const=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5 +echo "${ECHO_T}$ac_cv_c_const" >&6 +if test $ac_cv_c_const = no; then + +cat >>confdefs.h <<\_ACEOF +#define const +_ACEOF + +fi + +echo "$as_me:$LINENO: checking whether time.h and sys/time.h may both be included" >&5 +echo $ECHO_N "checking whether time.h and sys/time.h may both be included... $ECHO_C" >&6 +if test "${ac_cv_header_time+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +#include +#include + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +if ((struct tm *) 0) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_header_time=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_header_time=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_header_time" >&5 +echo "${ECHO_T}$ac_cv_header_time" >&6 +if test $ac_cv_header_time = yes; then + +cat >>confdefs.h <<\_ACEOF +#define TIME_WITH_SYS_TIME 1 +_ACEOF + +fi + + +# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works +# for constant arguments. Useless! +echo "$as_me:$LINENO: checking for working alloca.h" >&5 +echo $ECHO_N "checking for working alloca.h... $ECHO_C" >&6 +if test "${ac_cv_working_alloca_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#include +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +char *p = (char *) alloca (2 * sizeof (int)); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_working_alloca_h=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_working_alloca_h=no +fi +rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_working_alloca_h" >&5 +echo "${ECHO_T}$ac_cv_working_alloca_h" >&6 +if test $ac_cv_working_alloca_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ALLOCA_H 1 +_ACEOF + +fi + +echo "$as_me:$LINENO: checking for alloca" >&5 +echo $ECHO_N "checking for alloca... $ECHO_C" >&6 +if test "${ac_cv_func_alloca_works+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +# endif +#endif + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +char *p = (char *) alloca (1); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_func_alloca_works=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +ac_cv_func_alloca_works=no +fi +rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_func_alloca_works" >&5 +echo "${ECHO_T}$ac_cv_func_alloca_works" >&6 + +if test $ac_cv_func_alloca_works = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ALLOCA 1 +_ACEOF + +else + # The SVR3 libPW and SVR4 libucb both contain incompatible functions +# that cause trouble. Some versions do not even contain alloca or +# contain a buggy version. If you still want to use their alloca, +# use ar to extract alloca.o from them instead of compiling alloca.c. + +ALLOCA=alloca.$ac_objext + +cat >>confdefs.h <<\_ACEOF +#define C_ALLOCA 1 +_ACEOF + + +echo "$as_me:$LINENO: checking whether \`alloca.c' needs Cray hooks" >&5 +echo $ECHO_N "checking whether \`alloca.c' needs Cray hooks... $ECHO_C" >&6 +if test "${ac_cv_os_cray+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +#if defined(CRAY) && ! defined(CRAY2) +webecray +#else +wenotbecray +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "webecray" >/dev/null 2>&1; then + ac_cv_os_cray=yes +else + ac_cv_os_cray=no +fi +rm -f conftest* + +fi +echo "$as_me:$LINENO: result: $ac_cv_os_cray" >&5 +echo "${ECHO_T}$ac_cv_os_cray" >&6 +if test $ac_cv_os_cray = yes; then + for ac_func in _getb67 GETB67 getb67; do + as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. */ +#include +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char $ac_func (); +char (*f) (); + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +f = $ac_func; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +eval "$as_ac_var=no" +fi +rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 +if test `eval echo '${'$as_ac_var'}'` = yes; then + +cat >>confdefs.h <<_ACEOF +#define CRAY_STACKSEG_END $ac_func +_ACEOF + + break +fi + + done +fi + +echo "$as_me:$LINENO: checking stack direction for C alloca" >&5 +echo $ECHO_N "checking stack direction for C alloca... $ECHO_C" >&6 +if test "${ac_cv_c_stack_direction+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$cross_compiling" = yes; then + ac_cv_c_stack_direction=0 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +int +find_stack_direction () +{ + static char *addr = 0; + auto char dummy; + if (addr == 0) + { + addr = &dummy; + return find_stack_direction (); + } + else + return (&dummy > addr) ? 1 : -1; +} + +int +main () +{ + exit (find_stack_direction () < 0); +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_c_stack_direction=1 +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +( exit $ac_status ) +ac_cv_c_stack_direction=-1 +fi +rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_c_stack_direction" >&5 +echo "${ECHO_T}$ac_cv_c_stack_direction" >&6 + +cat >>confdefs.h <<_ACEOF +#define STACK_DIRECTION $ac_cv_c_stack_direction +_ACEOF + + +fi + +echo "$as_me:$LINENO: checking for working memcmp" >&5 +echo $ECHO_N "checking for working memcmp... $ECHO_C" >&6 +if test "${ac_cv_func_memcmp_working+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$cross_compiling" = yes; then + ac_cv_func_memcmp_working=no +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ + + /* Some versions of memcmp are not 8-bit clean. */ + char c0 = 0x40, c1 = 0x80, c2 = 0x81; + if (memcmp(&c0, &c2, 1) >= 0 || memcmp(&c1, &c2, 1) >= 0) + exit (1); + + /* The Next x86 OpenStep bug shows up only when comparing 16 bytes + or more and with at least one buffer not starting on a 4-byte boundary. + William Lewis provided this test program. */ + { + char foo[21]; + char bar[21]; + int i; + for (i = 0; i < 4; i++) + { + char *a = foo + i; + char *b = bar + i; + strcpy (a, "--------01111111"); + strcpy (b, "--------10000000"); + if (memcmp (a, b, 16) >= 0) + exit (1); + } + exit (0); + } + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_func_memcmp_working=yes +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +( exit $ac_status ) +ac_cv_func_memcmp_working=no +fi +rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_func_memcmp_working" >&5 +echo "${ECHO_T}$ac_cv_func_memcmp_working" >&6 +test $ac_cv_func_memcmp_working = no && LIBOBJS="$LIBOBJS memcmp.$ac_objext" + + +echo "$as_me:$LINENO: checking for prototypes" >&5 +echo $ECHO_N "checking for prototypes... $ECHO_C" >&6 +if test "${cv_have_prototypes+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" +int foo(int x) { return 0; } +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +return foo(10); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cv_have_prototypes=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +cv_have_prototypes=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $cv_have_prototypes" >&5 +echo "${ECHO_T}$cv_have_prototypes" >&6 +if test "$cv_have_prototypes" = yes; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_PROTOTYPES 1 +_ACEOF + +fi + +echo "$as_me:$LINENO: checking for variable length prototypes and stdarg.h" >&5 +echo $ECHO_N "checking for variable length prototypes and stdarg.h... $ECHO_C" >&6 +if test "${cv_stdarg+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +#include "confdefs.h" + +#include +int foo(int x, ...) { + va_list va; + va_start(va, x); + va_arg(va, int); + va_arg(va, char *); + va_arg(va, double); + return 0; +} + +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif +int +main () +{ +return foo(10, "", 3.14); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cv_stdarg=yes +else + echo "$as_me: failed program was:" >&5 +cat conftest.$ac_ext >&5 +cv_stdarg=no +fi +rm -f conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $cv_stdarg" >&5 +echo "${ECHO_T}$cv_stdarg" >&6 +if test "$cv_stdarg" = yes; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_STDARG_PROTOTYPES 1 +_ACEOF + +fi + + + +ac_config_files="$ac_config_files Makefile" +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overriden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +{ + (set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} | + sed ' + t clear + : clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if cmp -s $cache_file confcache; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + +DEFS=-DHAVE_CONFIG_H + + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi + +# NLS nuisances. +# Support unset when possible. +if (FOO=FOO; unset FOO) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + +(set +x; test -n "`(LANG=C; export LANG) 2>&1`") && + { $as_unset LANG || test "${LANG+set}" != set; } || + { LANG=C; export LANG; } +(set +x; test -n "`(LC_ALL=C; export LC_ALL) 2>&1`") && + { $as_unset LC_ALL || test "${LC_ALL+set}" != set; } || + { LC_ALL=C; export LC_ALL; } +(set +x; test -n "`(LC_TIME=C; export LC_TIME) 2>&1`") && + { $as_unset LC_TIME || test "${LC_TIME+set}" != set; } || + { LC_TIME=C; export LC_TIME; } +(set +x; test -n "`(LC_CTYPE=C; export LC_CTYPE) 2>&1`") && + { $as_unset LC_CTYPE || test "${LC_CTYPE+set}" != set; } || + { LC_CTYPE=C; export LC_CTYPE; } +(set +x; test -n "`(LANGUAGE=C; export LANGUAGE) 2>&1`") && + { $as_unset LANGUAGE || test "${LANGUAGE+set}" != set; } || + { LANGUAGE=C; export LANGUAGE; } +(set +x; test -n "`(LC_COLLATE=C; export LC_COLLATE) 2>&1`") && + { $as_unset LC_COLLATE || test "${LC_COLLATE+set}" != set; } || + { LC_COLLATE=C; export LC_COLLATE; } +(set +x; test -n "`(LC_NUMERIC=C; export LC_NUMERIC) 2>&1`") && + { $as_unset LC_NUMERIC || test "${LC_NUMERIC+set}" != set; } || + { LC_NUMERIC=C; export LC_NUMERIC; } +(set +x; test -n "`(LC_MESSAGES=C; export LC_MESSAGES) 2>&1`") && + { $as_unset LC_MESSAGES || test "${LC_MESSAGES+set}" != set; } || + { LC_MESSAGES=C; export LC_MESSAGES; } + + +# Name of the executable. +as_me=`(basename "$0") 2>/dev/null || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conftest.sh + echo "exit 0" >>conftest.sh + chmod +x conftest.sh + if (PATH=".;."; conftest.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conftest.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH || test "${CDPATH+set}" != set || { CDPATH=$PATH_SEPARATOR; export CDPATH; } + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by $as_me, which was +generated by GNU Autoconf 2.53. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +_ACEOF + +# Files that config.status was made for. +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi + +cat >>$CONFIG_STATUS <<\_ACEOF + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to ." +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.53, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" + +Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001 +Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=$srcdir +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + shift + set dummy "$ac_option" "$ac_optarg" ${1+"$@"} + shift + ;; + -*);; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_need_defaults=false;; + esac + + case $1 in + # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + echo "running $SHELL $0 " $ac_configure_args " --no-create --no-recursion" + exec $SHELL $0 $ac_configure_args --no-create --no-recursion ;; +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + shift + CONFIG_FILES="$CONFIG_FILES $1" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + shift + CONFIG_HEADERS="$CONFIG_HEADERS $1" + ac_need_defaults=false;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +_ACEOF + + + + + +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. +: ${TMPDIR=/tmp} +{ + tmp=`(umask 077 && mktemp -d -q "$TMPDIR/csXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=$TMPDIR/cs$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in $TMPDIR" >&2 + { (exit 1); exit 1; } +} + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@RUBYDIR@,$RUBYDIR,;t t +s,@STATISTICS@,$STATISTICS,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@RANLIB@,$RANLIB,;t t +s,@ac_ct_RANLIB@,$ac_ct_RANLIB,;t t +s,@CPP@,$CPP,;t t +s,@ALLOCA@,$ALLOCA,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@@,$,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { case "$ac_dir" in + [\\/]* | ?:[\\/]* ) as_incr_dir=;; + *) as_incr_dir=.;; +esac +as_dummy="$ac_dir" +for as_mkdir_dir in `IFS='/\\'; set X $as_dummy; shift; echo "$@"`; do + case $as_mkdir_dir in + # Skip DOS drivespec + ?:) as_incr_dir=$as_mkdir_dir ;; + *) + as_incr_dir=$as_incr_dir/$as_mkdir_dir + test -d "$as_incr_dir" || + mkdir "$as_incr_dir" || + { { echo "$as_me:$LINENO: error: cannot create \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; } + ;; + esac +done; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac +# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be +# absolute. +ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd` +ac_abs_top_builddir=`cd "$ac_dir" && cd $ac_top_builddir && pwd` +ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd` +ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd` + + + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo $f;; + *) # Relative + if test -f "$f"; then + # Build tree + echo $f + elif test -f "$srcdir/$f"; then + # Source tree + echo $srcdir/$f + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$,\1#\2' +ac_dC=' ' +ac_dD=',;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$,\1#\2define\3' +ac_uC=' ' +ac_uD=',;t' + +for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo $f;; + *) # Relative + if test -f "$f"; then + # Build tree + echo $f + elif test -f "$srcdir/$f"; then + # Source tree + echo $srcdir/$f + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + # Remove the trailing spaces. + sed 's/[ ]*$//' $ac_file_inputs >$tmp/in + +_ACEOF + +# Transform confdefs.h into two sed scripts, `conftest.defines' and +# `conftest.undefs', that substitutes the proper values into +# config.h.in to produce config.h. The first handles `#define' +# templates, and the second `#undef' templates. +# And first: Protect against being on the right side of a sed subst in +# config.status. Protect against being in an unquoted here document +# in config.status. +rm -f conftest.defines conftest.undefs +# Using a here document instead of a string reduces the quoting nightmare. +# Putting comments in sed scripts is not portable. +# +# `end' is used to avoid that the second main sed command (meant for +# 0-ary CPP macros) applies to n-ary macro definitions. +# See the Autoconf documentation for `clear'. +cat >confdef2sed.sed <<\_ACEOF +s/[\\&,]/\\&/g +s,[\\$`],\\&,g +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp +t end +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp +: end +_ACEOF +# If some macros were called several times there might be several times +# the same #defines, which is useless. Nevertheless, we may not want to +# sort them, since we want the *last* AC-DEFINE to be honored. +uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines +sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs +rm -f confdef2sed.sed + +# This sed command replaces #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +cat >>conftest.undefs <<\_ACEOF +s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */, +_ACEOF + +# Break up conftest.defines because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS +echo ' if egrep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS +echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS +echo ' :' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.defines >/dev/null +do + # Write a limited-size here document to $tmp/defines.sed. + echo ' cat >$tmp/defines.sed <>$CONFIG_STATUS + # Speed up: don't consider the non `#define' lines. + echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/defines.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines +echo ' fi # egrep' >>$CONFIG_STATUS +echo >>$CONFIG_STATUS + +# Break up conftest.undefs because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #undef templates' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.undefs >/dev/null +do + # Write a limited-size here document to $tmp/undefs.sed. + echo ' cat >$tmp/undefs.sed <>$CONFIG_STATUS + # Speed up: don't consider the non `#undef' + echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/undefs.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail + rm -f conftest.undefs + mv conftest.tail conftest.undefs +done +rm -f conftest.undefs + +cat >>$CONFIG_STATUS <<\_ACEOF + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + echo "/* Generated by configure. */" >$tmp/config.h + else + echo "/* $ac_file. Generated by configure. */" >$tmp/config.h + fi + cat $tmp/in >>$tmp/config.h + rm -f $tmp/in + if test x"$ac_file" != x-; then + if cmp -s $ac_file $tmp/config.h 2>/dev/null; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { case "$ac_dir" in + [\\/]* | ?:[\\/]* ) as_incr_dir=;; + *) as_incr_dir=.;; +esac +as_dummy="$ac_dir" +for as_mkdir_dir in `IFS='/\\'; set X $as_dummy; shift; echo "$@"`; do + case $as_mkdir_dir in + # Skip DOS drivespec + ?:) as_incr_dir=$as_mkdir_dir ;; + *) + as_incr_dir=$as_incr_dir/$as_mkdir_dir + test -d "$as_incr_dir" || + mkdir "$as_incr_dir" || + { { echo "$as_me:$LINENO: error: cannot create \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; } + ;; + esac +done; } + + rm -f $ac_file + mv $tmp/config.h $ac_file + fi + else + cat $tmp/config.h + rm -f $tmp/config.h + fi +done +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + exec 5>/dev/null + $SHELL $CONFIG_STATUS || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + diff --git a/ext/mbstring/oniguruma/configure.in b/ext/mbstring/oniguruma/configure.in deleted file mode 100644 index 84af3fbdb8..0000000000 --- a/ext/mbstring/oniguruma/configure.in +++ /dev/null @@ -1,70 +0,0 @@ -dnl Process this file with autoconf to produce a configure script. -AC_INIT(regex.c) - -AC_CONFIG_HEADER(config.h) - -dnl default value for RUBYDIR -RUBYDIR=".." -AC_ARG_WITH(rubydir, - [ --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..)], - [ RUBYDIR=$withval ]) -AC_SUBST(RUBYDIR) - -dnl default value for STATISTICS -STATISTICS="" -AC_ARG_WITH(statistics, - [ --with-statistics take matching time statistical data], - [ STATISTICS=-DREG_DEBUG_STATISTICS ]) -AC_SUBST(STATISTICS) - -dnl Checks for programs. -AC_PROG_CC -AC_PROG_RANLIB -dnl AC_PROG_INSTALL - -dnl Checks for libraries. - -dnl Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS(stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h) - -dnl Checks for typedefs, structures, and compiler characteristics. -AC_CHECK_SIZEOF(int, 4) -AC_CHECK_SIZEOF(short, 2) -AC_CHECK_SIZEOF(long, 4) -AC_C_CONST -AC_HEADER_TIME - -dnl Checks for library functions. -AC_FUNC_ALLOCA -AC_FUNC_MEMCMP - -AC_CACHE_CHECK(for prototypes, cv_have_prototypes, - [AC_TRY_COMPILE([int foo(int x) { return 0; }], [return foo(10);], - cv_have_prototypes=yes, - cv_have_prototypes=no)]) -if test "$cv_have_prototypes" = yes; then - AC_DEFINE(HAVE_PROTOTYPES) -fi - -AC_CACHE_CHECK(for variable length prototypes and stdarg.h, cv_stdarg, - [AC_TRY_COMPILE([ -#include -int foo(int x, ...) { - va_list va; - va_start(va, x); - va_arg(va, int); - va_arg(va, char *); - va_arg(va, double); - return 0; -} -], [return foo(10, "", 3.14);], - cv_stdarg=yes, - cv_stdarg=no)]) -if test "$cv_stdarg" = yes; then - AC_DEFINE(HAVE_STDARG_PROTOTYPES) -fi - -AC_SUBST() - -AC_OUTPUT(Makefile) diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API deleted file mode 100644 index 96f53ae9b8..0000000000 --- a/ext/mbstring/oniguruma/doc/API +++ /dev/null @@ -1,279 +0,0 @@ -Oniguruma API 2003/07/04 - -declared in regex.h. - - -# int regex_init(void) - - Initialize library. - - You don't have to call it explicitly, because it is called in regex_new(). - - -# int regex_error_code_to_str(UChar* err_buf, int err_code, ...) - - Return error message string length. - - arguments - 1 err_buf: error message buffer. - (required size: REG_MAX_ERROR_MESSAGE_LEN) - 2 err_code: error code returned from other API functions. - 3 err_info (optional): error info returned from regex_new() - and regex_recompile(). - - -# int regex_new(regex_t** reg, UChar* pattern, UChar* pattern_end, - RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, - RegErrorInfo* err_info) - - Create new regex object(regex_t). - - normal return: REG_NORMAL - - arguments - 1 reg: return regex object's address. - 2 pattern: regex pattern string. - 3 pattern_end: terminate address of pattern. (pattern + pattern length) - 4 option: compile time options. - - REG_OPTION_NONE no option - REG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z' - REG_OPTION_MULTILINE '.' match with newline - REG_OPTION_IGNORECASE ignore case (case-insensitive) - REG_OPTION_EXTEND extended pattern form - REG_OPTION_FIND_LONGEST find longest match - REG_OPTION_FIND_NOT_EMPTY ignore empty match - REG_OPTION_NEGATE_SINGLELINE - clear REG_OPTION_SINGLELINE which is default on - in REG_SYNTAX_POSIX_XXX, REG_SYNTAX_PERL and REG_SYNTAX_JAVA. - REG_OPTION_CAPTURE_ONLY_NAMED_GROUP named group only captured. - - 5 code: character encoding. - - REGCODE_ASCII ASCII - REGCODE_UTF8 UTF-8 - REGCODE_EUCJP EUC-JP - REGCODE_SJIS Shift_JIS - REGCODE_DEFAULT ASCII - - 6 syntax: pointer to pattern syntax definition. - - REG_SYNTAX_POSIX_BASIC POSIX Basic RE - REG_SYNTAX_POSIX_EXTENDED POSIX Extended RE - REG_SYNTAX_EMACS Emacs - REG_SYNTAX_GREP grep - REG_SYNTAX_GNU_REGEX GNU regex - REG_SYNTAX_JAVA Java (Sun java.util.regex) - REG_SYNTAX_PERL Perl - REG_SYNTAX_RUBY Ruby - REG_SYNTAX_DEFAULT default (== Ruby) - regex_set_default_syntax() - - or any RegSyntaxType data pointer defined by user. - - 7 err_info: address for return optional error info. - use this value as 3rd argument of regex_error_code_to_str(). - - -# void regex_free(regex_t* reg) - - Free memory used by regex object. - - arguments - 1 reg: regex object. - - -# int regex_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end, - RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, - RegErrorInfo* err_info) - - Recompile regex object. - - normal return: REG_NORMAL - - arguments - 1 reg: regex object. - - Another arguments are same with regex_new(). - - -# int regex_search(regex_t* reg, UChar* str, UChar* end, UChar* start, - UChar* range, RegRegion* region, RegOptionType option) - - Search string and return search result and matching region. - - normal return: match position offset (i.e. p - str >= 0) - not found: REG_MISMATCH (< 0) - - arguments - 1 reg: regex object - 2 str: target string - 3 end: terminate address of target string - 4 start: search start address of target string - 5 range: search terminate address of target string - 6 region: address for return group match range info (NULL is allowed) - 7 option: search time option - - REG_OPTION_NOTBOL string head(str) isn't considered as begin of line - REG_OPTION_NOTEOL string end (end) isn't considered as end of line - REG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. - - -# int regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at, - RegRegion* region, RegOptionType option) - - Match string and return result and matching region. - - normal return: match length (i.e. p - at >= 0) - not match: REG_MISMATCH (< 0) - - arguments - 1 reg: regex object - 2 str: target string - 3 end: terminate address of target string - 4 at: match address of target string - 5 region: address for return group match range info (NULL is allowed) - 6 option: search time option - - REG_OPTION_NOTBOL string head(str) isn't considered as begin of line - REG_OPTION_NOTEOL string end (end) isn't considered as end of line - REG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. - - -# RegRegion* regex_region_new(void) - - Create a region. - - -# void regex_region_free(RegRegion* region, int free_self) - - Free memory used by region. - - arguments - 1 region: target region - 2 free_self: [1: free all, 0: free memory used in region but not self] - - -# void regex_region_copy(RegRegion* to, RegRegion* from) - - Copy contents of region. - - arguments - 1 to: target region - 2 from: source region - - -# void regex_region_clear(RegRegion* region) - - Clear contents of region. - - arguments - 1 region: target region - - -# int regex_region_resize(RegRegion* region, int n) - - Resize group range area of region. - - normal return: REG_NORMAL - - arguments - 1 region: target region - 2 n: new size - - -# int regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, - int** num_list) - - Return group number list of name. - Named subexp is defined by (?....). - - normal return: number of groups for the name. - (ex. /(?..)...(?..)/ ==> 2) - name not found: -1 - - arguments - 1 reg: regex object. - 2 name: subexp-name. - 3 name_end: terminate address of subexp-name. - 4 num_list: return list of group number. - - -# int regex_foreach_names(regex_t* reg, int (*func)(UChar*,int,int*,void*), - void* arg) - - Iterate function call for all names. - - normal return: 0 - error: func's return value. - - arguments - 1 reg: regex object. - 2 func: called function. - func(name, , , arg); - if func return non 0 value, iteration is stopped. - 3 arg: argument for func. - - -# UChar* regex_get_prev_char_head(RegCharEncoding code, UChar* start, UChar* s) - - Return previous character head address. - - arguments - 1 code: character encoding - 2 start: string address - 3 s: target address of string - - -# UChar* regex_get_left_adjust_char_head(RegCharEncoding code, - UChar* start, UChar* s) - - Return left-adjusted head address of a character. - - arguments - 1 code: character encoding - 2 start: string address - 3 s: target address of string - - -# UChar* regex_get_right_adjust_char_head(RegCharEncoding code, - UChar* start, UChar* s) - - Return right-adjusted head address of a character. - - arguments - 1 code: character encoding - 2 start: string address - 3 s: target address of string - - -# int regex_set_default_syntax(RegSyntaxType* syntax) - - Set default syntax. - - arguments - 1 syntax: pointer to pattern syntax definition. - - -# void regex_set_default_trans_table(UChar* table) - - Set default case transformation table. - - arguments - 1 table: case transformation table - - (* this function will be obsoleted in future version) - - -# int regex_end(void) - - The use of this library is finished. - - normal return: REG_NORMAL - - -# const char* regex_version(void) - - Return version string. (ex. "1.8.6") - -// END diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE deleted file mode 100644 index 3527b4556f..0000000000 --- a/ext/mbstring/oniguruma/doc/RE +++ /dev/null @@ -1,224 +0,0 @@ -Oniguruma Regular Expressions 2003/07/04 - -syntax: REG_SYNTAX_RUBY (default) - - -1. Syntax elements - - \ escape - | alternation - (...) group - [...] character class - - -2. Characters - - \t horizontal tab (0x09) - \v vertical tab (0x0B) - \n newline (0x0A) - \r return (0x0D) - \b back space (0x08) (* in character class only) - \f form feed (0x0C) - \a bell (0x07) - \e escape (0x1B) - \nnn octal char - \xHH hexadecimal char - \x{7HHHHHHH} wide hexadecimal char - \cx control char - \C-x control char - \M-x meta (x|0x80) - \M-\C-x meta control char - - -3. Character types - - . any character (except newline) - \w word character (alphanumeric, "_" and multibyte char) - \W non-word char - \s whitespace char (\t, \n, \v, \f, \r, \x20) - \S non-whitespace char - \d digit char - \D non-digit char - - -4. Quantifier - - greedy - - ? 1 or 0 times - * 0 or more times - + 1 or more times - {n,m} at least n but not more than m times - {n,} at least n times - {n} n times - - reluctant - - ?? 1 or 0 times - *? 0 or more times - +? 1 or more times - {n,m}? at least n but not more than m times - {n,}? at least n times - - possessive (greedy and does not backtrack after repeated) - - ?+ 1 or 0 times - *+ 0 or more times - ++ 1 or more times - - -5. Anchors - - ^ beginning of the line - $ end of the line - \b word boundary - \B not word boundary - \A beginning of string - \Z end of string, or before newline at the end - \z end of string - \G previous end-of-match position - - -6. POSIX character class ([:xxxxx:], negate [:^xxxxx:]) - - alnum alphabet or digit char - alpha alphabet - ascii code value: [0 - 127] - blank \t, \x20 - cntrl - digit 0-9 - graph - lower - print - punct - space \t, \n, \v, \f, \r, \x20 - upper - xdigit 0-9, a-f, A-F - - -7. Operators in character class - - [...] group (character class in character class) - && intersection - (lowest precedence operator in character class) - - ex. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w] - - -8. Extended expressions - - (?#...) comment - (?imx-imx) option on/off - i: ignore case - m: multi-line (dot(.) match newline) - x: extended form - (?imx-imx:subexp) option on/off for subexp - (?:subexp) not captured - (?=subexp) look-ahead - (?!subexp) negative look-ahead - (?<=subexp) look-behind - (?subexp) don't backtrack - (?subexp) define named group - (name can not include '>', ')', '\' and NUL character) - - -9. Back reference - - \n back reference by group number (n >= 1) - \k back reference by group name - - -10. Subexp call ("Tanaka Akira special") - - \g call by group name - \g call by group number (only if 'n' is not defined as name) - - ------------------------------ -11. Original extensions - - + named group (?...) - + named backref \k - + subexp call \g, \g - - -12. Lacked features compare with perl 5.8.0 - - + [:word:] - + \N{name} - + \l,\u,\L,\U, \P, \X, \C - + (?{code}) - + (??{code}) - + (?(condition)yes-pat|no-pat) - - + \Q...\E (* This is effective on REG_SYNTAX_PERL and REG_SYNTAX_JAVA) - - -13. Syntax depend options - - + REG_SYNTAX_RUBY (default) - (?m): dot(.) match newline - - + REG_SYNTAX_PERL, REG_SYNTAX_JAVA - (?s): dot(.) match newline - (?m): ^ match after newline, $ match before newline - - -14. Differences with Japanized GNU regex(version 0.12) of Ruby - - + add look behind - (?<=fixed-char-length-pattern), (? match - /(?:()|())*\1\2/ =~ "" #=> fail - - /(?:\1a|())*/ =~ "a" #=> match with "" - - + Ignore case option is not effect to an octal or hexadecimal - numbered char, but it becomes effective if it appears in the char class. - This doesn't have consistency, though they are the specifications - which are the same as GNU regex of Ruby. - - /\x61/i.match("A") # => nil - /[\x61]/i.match("A") # => match - -// END diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h new file mode 100644 index 0000000000..4c029304b6 --- /dev/null +++ b/ext/mbstring/oniguruma/onigcmpt200.h @@ -0,0 +1,304 @@ +/********************************************************************** + + onigcmpt200.h - Oniguruma (regular expression library) + + Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp) + +**********************************************************************/ +#ifndef ONIGCMPT200_H +#define ONIGCMPT200_H + +/* constants */ +#define REG_MAX_ERROR_MESSAGE_LEN ONIG_MAX_ERROR_MESSAGE_LEN + +#define RegCharEncoding OnigEncoding + +#define REG_ENCODING_ASCII ONIG_ENCODING_ASCII +#define REG_ENCODING_ISO_8859_1 ONIG_ENCODING_ISO_8859_1 +#define REG_ENCODING_ISO_8859_15 ONIG_ENCODING_ISO_8859_15 +#define REG_ENCODING_UTF8 ONIG_ENCODING_UTF8 +#define REG_ENCODING_EUC_JP ONIG_ENCODING_EUC_JP +#define REG_ENCODING_SJIS ONIG_ENCODING_SJIS +#define REG_ENCODING_BIG5 ONIG_ENCODING_BIG5 +#define REG_ENCODING_UNDEF ONIG_ENCODING_UNDEF + +/* Don't use REGCODE_XXXX. (obsoleted) */ +#define REGCODE_UNDEF REG_ENCODING_UNDEF +#define REGCODE_ASCII REG_ENCODING_ASCII +#define REGCODE_UTF8 REG_ENCODING_UTF8 +#define REGCODE_EUCJP REG_ENCODING_EUC_JP +#define REGCODE_SJIS REG_ENCODING_SJIS + +typedef unsigned char* RegTransTableType; +#define RegOptionType OnigOptionType +#define RegDistance OnigDistance + +#define REG_OPTION_DEFAULT ONIG_OPTION_DEFAULT + +/* options */ +#define REG_OPTION_NONE ONIG_OPTION_NONE +#define REG_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE +#define REG_OPTION_MULTILINE ONIG_OPTION_MULTILINE +#define REG_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE +#define REG_OPTION_EXTEND ONIG_OPTION_EXTEND +#define REG_OPTION_FIND_LONGEST ONIG_OPTION_FIND_LONGEST +#define REG_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY +#define REG_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE +#define REG_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP +#define REG_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP +#define REG_OPTION_NOTBOL ONIG_OPTION_NOTBOL +#define REG_OPTION_NOTEOL ONIG_OPTION_NOTEOL +#define REG_OPTION_POSIX_REGION ONIG_OPTION_POSIX_REGION + +#define REG_OPTION_ON ONIG_OPTION_ON +#define REG_OPTION_OFF ONIG_OPTION_OFF +#define IS_REG_OPTION_ON ONIG_IS_OPTION_ON + +/* syntax */ +#define RegSyntaxType OnigSyntaxType + +#define RegSyntaxPosixBasic OnigSyntaxPosixBasic +#define RegSyntaxPosixExtended OnigSyntaxPosixExtended +#define RegSyntaxEmacs OnigSyntaxEmacs +#define RegSyntaxGrep OnigSyntaxGrep +#define RegSyntaxGnuRegex OnigSyntaxGnuRegex +#define RegSyntaxJava OnigSyntaxJava +#define RegSyntaxPerl OnigSyntaxPerl +#define RegSyntaxRuby OnigSyntaxRuby + +#define REG_SYNTAX_POSIX_BASIC ONIG_SYNTAX_POSIX_BASIC +#define REG_SYNTAX_POSIX_EXTENDED ONIG_SYNTAX_POSIX_EXTENDED +#define REG_SYNTAX_EMACS ONIG_SYNTAX_EMACS +#define REG_SYNTAX_GREP ONIG_SYNTAX_GREP +#define REG_SYNTAX_GNU_REGEX ONIG_SYNTAX_GNU_REGEX +#define REG_SYNTAX_JAVA ONIG_SYNTAX_JAVA +#define REG_SYNTAX_PERL ONIG_SYNTAX_PERL +#define REG_SYNTAX_RUBY ONIG_SYNTAX_RUBY + +#define REG_SYNTAX_DEFAULT ONIG_SYNTAX_DEFAULT +#define RegDefaultSyntax OnigDefaultSyntax + +/* syntax (operators) */ +#define REG_SYN_OP_VARIABLE_META_CHARACTERS \ + ONIG_SYN_OP_VARIABLE_META_CHARACTERS +#define REG_SYN_OP_DOT_ANYCHAR \ + ONIG_SYN_OP_DOT_ANYCHAR +#define REG_SYN_OP_ASTERISK_ZERO_INF \ + ONIG_SYN_OP_ASTERISK_ZERO_INF +#define REG_SYN_OP_ESC_ASTERISK_ZERO_INF \ + ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF +#define REG_SYN_OP_PLUS_ONE_INF \ + ONIG_SYN_OP_PLUS_ONE_INF +#define REG_SYN_OP_ESC_PLUS_ONE_INF \ + ONIG_SYN_OP_ESC_PLUS_ONE_INF +#define REG_SYN_OP_QMARK_ZERO_ONE \ + ONIG_SYN_OP_QMARK_ZERO_ONE +#define REG_SYN_OP_ESC_QMARK_ZERO_ONE \ + ONIG_SYN_OP_ESC_QMARK_ZERO_ONE +#define REG_SYN_OP_BRACE_INTERVAL \ + ONIG_SYN_OP_BRACE_INTERVAL +#define REG_SYN_OP_ESC_BRACE_INTERVAL \ + ONIG_SYN_OP_ESC_BRACE_INTERVAL +#define REG_SYN_OP_VBAR_ALT \ + ONIG_SYN_OP_VBAR_ALT +#define REG_SYN_OP_ESC_VBAR_ALT \ + ONIG_SYN_OP_ESC_VBAR_ALT +#define REG_SYN_OP_LPAREN_SUBEXP \ + ONIG_SYN_OP_LPAREN_SUBEXP +#define REG_SYN_OP_ESC_LPAREN_SUBEXP \ + ONIG_SYN_OP_ESC_LPAREN_SUBEXP +#define REG_SYN_OP_ESC_AZ_BUF_ANCHOR \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR +#define REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR \ + ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR +#define REG_SYN_OP_DECIMAL_BACKREF \ + ONIG_SYN_OP_DECIMAL_BACKREF +#define REG_SYN_OP_BRACKET_CC \ + ONIG_SYN_OP_BRACKET_CC +#define REG_SYN_OP_ESC_W_WORD \ + ONIG_SYN_OP_ESC_W_WORD +#define REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END \ + ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END +#define REG_SYN_OP_ESC_B_WORD_BOUND \ + ONIG_SYN_OP_ESC_B_WORD_BOUND +#define REG_SYN_OP_ESC_S_WHITE_SPACE \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE +#define REG_SYN_OP_ESC_D_DIGIT \ + ONIG_SYN_OP_ESC_D_DIGIT +#define REG_SYN_OP_LINE_ANCHOR \ + ONIG_SYN_OP_LINE_ANCHOR +#define REG_SYN_OP_POSIX_BRACKET \ + ONIG_SYN_OP_POSIX_BRACKET +#define REG_SYN_OP_QMARK_NON_GREEDY \ + ONIG_SYN_OP_QMARK_NON_GREEDY +#define REG_SYN_OP_ESC_CONTROL_CHARS \ + ONIG_SYN_OP_ESC_CONTROL_CHARS +#define REG_SYN_OP_ESC_C_CONTROL \ + ONIG_SYN_OP_ESC_C_CONTROL +#define REG_SYN_OP_ESC_OCTAL3 \ + ONIG_SYN_OP_ESC_OCTAL3 +#define REG_SYN_OP_ESC_X_HEX2 \ + ONIG_SYN_OP_ESC_X_HEX2 +#define REG_SYN_OP_ESC_X_BRACE_HEX8 \ + ONIG_SYN_OP_ESC_X_BRACE_HEX8 + +#define REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE \ + ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE +#define REG_SYN_OP2_QMARK_GROUP_EFFECT \ + ONIG_SYN_OP2_QMARK_GROUP_EFFECT +#define REG_SYN_OP2_OPTION_PERL \ + ONIG_SYN_OP2_OPTION_PERL +#define REG_SYN_OP2_OPTION_RUBY \ + ONIG_SYN_OP2_OPTION_RUBY +#define REG_SYN_OP2_PLUS_POSSESSIVE_REPEAT \ + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT +#define REG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL \ + ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL +#define REG_SYN_OP2_CCLASS_SET_OP \ + ONIG_SYN_OP2_CCLASS_SET_OP +#define REG_SYN_OP2_QMARK_LT_NAMED_GROUP \ + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP +#define REG_SYN_OP2_ESC_K_NAMED_BACKREF \ + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF +#define REG_SYN_OP2_ESC_G_SUBEXP_CALL \ + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL +#define REG_SYN_OP2_ATMARK_CAPTURE_HISTORY \ + ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY +#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL \ + ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL +#define REG_SYN_OP2_ESC_CAPITAL_M_BAR_META \ + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META +#define REG_SYN_OP2_ESC_V_VTAB \ + ONIG_SYN_OP2_ESC_V_VTAB +#define REG_SYN_OP2_ESC_U_HEX4 \ + ONIG_SYN_OP2_ESC_U_HEX4 +#define REG_SYN_OP2_ESC_GNU_BUF_ANCHOR \ + ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR + +#define REG_SYN_CONTEXT_INDEP_ANCHORS \ + ONIG_SYN_CONTEXT_INDEP_ANCHORS +#define REG_SYN_CONTEXT_INDEP_REPEAT_OPS \ + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS +#define REG_SYN_CONTEXT_INVALID_REPEAT_OPS \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS +#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP \ + ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP +#define REG_SYN_ALLOW_INVALID_INTERVAL \ + ONIG_SYN_ALLOW_INVALID_INTERVAL +#define REG_SYN_STRICT_CHECK_BACKREF \ + ONIG_SYN_STRICT_CHECK_BACKREF +#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND \ + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND +#define REG_SYN_CAPTURE_ONLY_NAMED_GROUP \ + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP +#define REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME \ + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME + +#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC \ + ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC +#define REG_SYN_BACKSLASH_ESCAPE_IN_CC \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC +#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC \ + ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC +#define REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC \ + ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC +#define REG_SYN_WARN_CC_OP_NOT_ESCAPED \ + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED +#define REG_SYN_WARN_REDUNDANT_NESTED_REPEAT \ + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT + +/* meta character specifiers (regex_set_meta_char()) */ +#define REG_META_CHAR_ESCAPE ONIG_META_CHAR_ESCAPE +#define REG_META_CHAR_ANYCHAR ONIG_META_CHAR_ANYCHAR +#define REG_META_CHAR_ANYTIME ONIG_META_CHAR_ANYTIME +#define REG_META_CHAR_ZERO_OR_ONE_TIME ONIG_META_CHAR_ZERO_OR_ONE_TIME +#define REG_META_CHAR_ONE_OR_MORE_TIME ONIG_META_CHAR_ONE_OR_MORE_TIME +#define REG_META_CHAR_ANYCHAR_ANYTIME ONIG_META_CHAR_ANYCHAR_ANYTIME + +#define REG_INEFFECTIVE_META_CHAR ONIG_INEFFECTIVE_META_CHAR + +/* error codes */ +#define REG_IS_PATTERN_ERROR ONIG_IS_PATTERN_ERROR +/* normal return */ +#define REG_NORMAL ONIG_NORMAL +#define REG_MISMATCH ONIG_MISMATCH +#define REG_NO_SUPPORT_CONFIG ONIG_NO_SUPPORT_CONFIG +/* internal error */ +#define REGERR_MEMORY ONIGERR_MEMORY +#define REGERR_MATCH_STACK_LIMIT_OVER ONIGERR_MATCH_STACK_LIMIT_OVER +#define REGERR_TYPE_BUG ONIGERR_TYPE_BUG +#define REGERR_PARSER_BUG ONIGERR_PARSER_BUG +#define REGERR_STACK_BUG ONIGERR_STACK_BUG +#define REGERR_UNDEFINED_BYTECODE ONIGERR_UNDEFINED_BYTECODE +#define REGERR_UNEXPECTED_BYTECODE ONIGERR_UNEXPECTED_BYTECODE +#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED \ + ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED +#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR \ + ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR +/* general error */ +#define REGERR_INVALID_ARGUMENT ONIGERR_INVALID_ARGUMENT +/* errors related to thread */ +#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT \ + ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define REG_MAX_CAPTURE_HISTORY_GROUP ONIG_MAX_CAPTURE_HISTORY_GROUP +#define REG_IS_CAPTURE_HISTORY_GROUP ONIG_IS_CAPTURE_HISTORY_GROUP + +#define REG_REGION_NOTPOS ONIG_REGION_NOTPOS + +#define RegRegion OnigRegion +#define RegErrorInfo OnigErrorInfo +#define RegRepeatRange OnigRepeatRange + +#define RegWarnFunc OnigWarnFunc +#define regex_null_warn onig_null_warn +#define REG_NULL_WARN ONIG_NULL_WARN + +/* regex_t state */ +#define REG_STATE_NORMAL ONIG_STATE_NORMAL +#define REG_STATE_SEARCHING ONIG_STATE_SEARCHING +#define REG_STATE_COMPILING ONIG_STATE_COMPILING +#define REG_STATE_MODIFY ONIG_STATE_MODIFY + +#define REG_STATE ONIG_STATE + +/* Oniguruma Native API */ +#define regex_init onig_init +#define regex_error_code_to_str onig_error_code_to_str +#define regex_set_warn_func onig_set_warn_func +#define regex_set_verb_warn_func onig_set_verb_warn_func +#define regex_new onig_new +#define regex_free onig_free +#define regex_recompile onig_recompile +#define regex_search onig_search +#define regex_match onig_match +#define regex_region_new onig_region_new +#define regex_region_free onig_region_free +#define regex_region_copy onig_region_copy +#define regex_region_clear onig_region_clear +#define regex_region_resize onig_region_resize +#define regex_name_to_group_numbers onig_name_to_group_numbers +#define regex_name_to_backref_number onig_name_to_backref_number +#define regex_foreach_name onig_foreach_name +#define regex_number_of_names onig_number_of_names +#define regex_get_encoding onig_get_encoding +#define regex_get_options onig_get_options +#define regex_get_syntax onig_get_syntax +#define regex_set_default_syntax onig_set_default_syntax +#define regex_copy_syntax onig_copy_syntax +#define regex_set_meta_char onig_set_meta_char +#define regex_end onig_end +#define regex_version onig_version + +/* encoding API */ +#define enc_get_prev_char_head onigenc_get_prev_char_head +#define enc_get_left_adjust_char_head onigenc_get_left_adjust_char_head +#define enc_get_right_adjust_char_head onigenc_get_right_adjust_char_head +/* obsoleted API */ +#define regex_get_prev_char_head onigenc_get_prev_char_head +#define regex_get_left_adjust_char_head onigenc_get_left_adjust_char_head +#define regex_get_right_adjust_char_head onigenc_get_right_adjust_char_head + +#endif /* ONIGCMPT200_H */ diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h new file mode 100644 index 0000000000..d78dc18b11 --- /dev/null +++ b/ext/mbstring/oniguruma/oniggnu.h @@ -0,0 +1,77 @@ +/********************************************************************** + + oniggnu.h - Oniguruma (regular expression library) + + Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp) + +**********************************************************************/ +#ifndef ONIGGNU_H +#define ONIGGNU_H + +#include "oniguruma.h" + +#define MBCTYPE_ASCII 0 +#define MBCTYPE_EUC 1 +#define MBCTYPE_SJIS 2 +#define MBCTYPE_UTF8 3 + +/* GNU regex options */ +#ifndef RE_NREGS +#define RE_NREGS ONIG_NREGION +#endif +#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE +#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND +#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE +#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE +#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST +#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) + +#ifdef RUBY_PLATFORM +#define re_mbcinit ruby_re_mbcinit +#define re_compile_pattern ruby_re_compile_pattern +#define re_recompile_pattern ruby_re_recompile_pattern +#define re_free_pattern ruby_re_free_pattern +#define re_adjust_startpos ruby_re_adjust_startpos +#define re_search ruby_re_search +#define re_match ruby_re_match +#define re_set_casetable ruby_re_set_casetable +#define re_copy_registers ruby_re_copy_registers +#define re_free_registers ruby_re_free_registers +#define register_info_type ruby_register_info_type +#define re_error_code_to_str ruby_error_code_to_str + +#define ruby_error_code_to_str onig_error_code_to_str +#define ruby_re_copy_registers onig_region_copy +#else +#define re_error_code_to_str onig_error_code_to_str +#define re_copy_registers onig_region_copy +#endif + +#ifdef ONIG_RUBY_M17N +ONIG_EXTERN +void re_mbcinit P_((OnigEncoding)); +#else +ONIG_EXTERN +void re_mbcinit P_((int)); +#endif + +ONIG_EXTERN +int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); +ONIG_EXTERN +int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); +ONIG_EXTERN +void re_free_pattern P_((struct re_pattern_buffer*)); +ONIG_EXTERN +int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); +ONIG_EXTERN +int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); +ONIG_EXTERN +int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); +ONIG_EXTERN +void re_set_casetable P_((const char*)); +ONIG_EXTERN +void re_free_registers P_((struct re_registers*)); +ONIG_EXTERN +int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ + +#endif /* ONIGGNU_H */ diff --git a/ext/mbstring/oniguruma/onigposix.h b/ext/mbstring/oniguruma/onigposix.h index ea93c6f9f7..3793ae6bd9 100644 --- a/ext/mbstring/oniguruma/onigposix.h +++ b/ext/mbstring/oniguruma/onigposix.h @@ -2,7 +2,7 @@ onigposix.h - Oniguruma (regular expression library) - Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #ifndef ONIGPOSIX_H @@ -13,7 +13,7 @@ #define REG_NEWLINE (1<<1) #define REG_NOTBOL (1<<2) #define REG_NOTEOL (1<<3) -#define REG_EXTENDED (1<<4) /* if not setted, Basic Regular Expression */ +#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ #define REG_NOSUB (1<<5) /* POSIX error codes */ @@ -38,11 +38,10 @@ #define REG_EONIG_THREAD 17 /* character encodings (for reg_set_encoding()) */ -/* These value must be same with MBCTYPE_XXXX in oniguruma.h.*/ -#define REG_ENCODING_ASCII 0 -#define REG_ENCODING_EUC_JP 1 -#define REG_ENCODING_SJIS 2 -#define REG_ENCODING_UTF8 3 +#define REG_POSIX_ENCODING_ASCII 0 +#define REG_POSIX_ENCODING_EUC_JP 1 +#define REG_POSIX_ENCODING_SJIS 2 +#define REG_POSIX_ENCODING_UTF8 3 #include @@ -63,73 +62,75 @@ typedef struct { #ifndef P_ -#ifdef __STDC__ +#if defined(__STDC__) || defined(_WIN32) # define P_(args) args #else # define P_(args) () #endif #endif -#ifndef REG_EXTERN +#ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__CYGWIN__) #if defined(EXPORT) || defined(RUBY_EXPORT) -#define REG_EXTERN extern __declspec(dllexport) -#elif defined(IMPORT) -#define REG_EXTERN extern __declspec(dllimport) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) #endif #endif #endif -#ifndef REG_EXTERN -#define REG_EXTERN extern +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern #endif #ifndef ONIGURUMA_H -typedef unsigned int RegOptionType; +typedef unsigned int OnigOptionType; /* syntax */ typedef struct { unsigned int op; unsigned int op2; unsigned int behavior; - RegOptionType options; /* default option */ -} RegSyntaxType; - -REG_EXTERN RegSyntaxType RegSyntaxPosixBasic; -REG_EXTERN RegSyntaxType RegSyntaxPosixExtended; -REG_EXTERN RegSyntaxType RegSyntaxEmacs; -REG_EXTERN RegSyntaxType RegSyntaxGrep; -REG_EXTERN RegSyntaxType RegSyntaxGnuRegex; -REG_EXTERN RegSyntaxType RegSyntaxJava; -REG_EXTERN RegSyntaxType RegSyntaxPerl; -REG_EXTERN RegSyntaxType RegSyntaxRuby; - -/* predefined syntaxes (see regcomp.c) */ -#define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic) -#define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended) -#define REG_SYNTAX_EMACS (&RegSyntaxEmacs) -#define REG_SYNTAX_GREP (&RegSyntaxGrep) -#define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex) -#define REG_SYNTAX_JAVA (&RegSyntaxJava) -#define REG_SYNTAX_PERL (&RegSyntaxPerl) -#define REG_SYNTAX_RUBY (&RegSyntaxRuby) + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regparse.c) */ +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) /* default syntax */ -#define REG_SYNTAX_DEFAULT RegDefaultSyntax +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax -REG_EXTERN RegSyntaxType* RegDefaultSyntax; +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; -REG_EXTERN int regex_set_default_syntax P_((RegSyntaxType* syntax)); +ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); #endif /* ONIGURUMA_H */ -REG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); -REG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); -REG_EXTERN void regfree P_((regex_t* reg)); -REG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); +ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); +ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); +ONIG_EXTERN void regfree P_((regex_t* reg)); +ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); /* extended API */ -REG_EXTERN void reg_set_encoding P_((int enc)); -REG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums)); -REG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,int,int*,void*), void* arg)); +ONIG_EXTERN void reg_set_encoding P_((int enc)); +ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums)); +ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), void* arg)); +ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); #endif /* ONIGPOSIX_H */ diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index e5236a80a7..fd9e8f1700 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -2,7 +2,7 @@ oniguruma.h - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #ifndef ONIGURUMA_H @@ -11,23 +11,12 @@ #include "php_compat.h" #define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 1 -#define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 1 - -/* config parameters */ -#ifndef RE_NREGS -#define RE_NREGS 10 -#endif -#define REG_NREGION RE_NREGS -#define REG_MAX_BACKREF_NUM 1000 -#define REG_MAX_REPEAT_NUM 100000 -#define REG_MAX_MULTI_BYTE_RANGES_NUM 1000 -/* constants */ -#define REG_MAX_ERROR_MESSAGE_LEN 90 +#define ONIGURUMA_VERSION_MAJOR 2 +#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_TEENY 2 #ifndef P_ -#ifdef __STDC__ +#if defined(__STDC__) || defined(_WIN32) # define P_(args) args #else # define P_(args) () @@ -42,241 +31,543 @@ #endif #endif -#ifndef REG_EXTERN +#ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__CYGWIN__) #if defined(EXPORT) || defined(RUBY_EXPORT) -#define REG_EXTERN extern __declspec(dllexport) -#elif defined(IMPORT) -#define REG_EXTERN extern __declspec(dllimport) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) #endif #endif #endif -#ifndef REG_EXTERN -#define REG_EXTERN extern +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern #endif -#define REG_CHAR_TABLE_SIZE 256 +/* PART: character encoding */ + +typedef unsigned char UChar; +typedef unsigned long OnigCodePoint; +typedef unsigned int OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigCodePointRange; + +#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16 +typedef struct { + int target_num; + int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; + UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; +} OnigEncFoldMatchInfo; -#define REGCODE_UNDEF ((RegCharEncoding )0) #if defined(RUBY_PLATFORM) && defined(M17N_H) -#define REG_RUBY_M17N -typedef m17n_encoding* RegCharEncoding; -#define REGCODE_DEFAULT REGCODE_UNDEF + +#define ONIG_RUBY_M17N +typedef m17n_encoding* OnigEncoding; + #else -typedef const char* RegCharEncoding; -#define MBCTYPE_ASCII 0 -#define MBCTYPE_EUC 1 -#define MBCTYPE_SJIS 2 -#define MBCTYPE_UTF8 3 - -#define REGCODE_ASCII REG_MBLEN_TABLE[MBCTYPE_ASCII] -#define REGCODE_UTF8 REG_MBLEN_TABLE[MBCTYPE_UTF8] -#define REGCODE_EUCJP REG_MBLEN_TABLE[MBCTYPE_EUC] -#define REGCODE_SJIS REG_MBLEN_TABLE[MBCTYPE_SJIS] -#define REGCODE_DEFAULT REGCODE_ASCII - -REG_EXTERN const char REG_MBLEN_TABLE[][REG_CHAR_TABLE_SIZE]; + +typedef struct { + const char len_table[256]; + const char* name; + int max_enc_len; + int is_fold_match; + int ctype_support_level; /* sb-only/full */ + int is_continuous_sb_mb; /* code point is continuous from sb to mb */ + OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end); + int (*code_to_mbclen)(OnigCodePoint code); + int (*code_to_mbc)(OnigCodePoint code, UChar *buf); + int (*mbc_to_lower)(UChar* p, UChar* lower); + int (*mbc_is_case_ambig)(UChar* p); + int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype); + int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]); + UChar* (*left_adjust_char_head)(UChar* start, UChar* s); + int (*is_allowed_reverse_match)(UChar* p, UChar* e); + int (*get_all_fold_match_code)(OnigCodePoint** codes); + int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info); +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; +ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; +ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) +#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) +#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) +#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) +#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) +#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) +#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) +#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) +#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) +#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) +#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) +#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) +#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) +#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) +#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) +#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) +#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) +#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) +#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) +#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) +#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) +#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) +#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) +#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) + #endif /* else RUBY && M17N */ -REG_EXTERN RegCharEncoding RegDefaultCharEncoding; +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN + +/* character types */ +#define ONIGENC_CTYPE_ALPHA (1<< 0) +#define ONIGENC_CTYPE_BLANK (1<< 1) +#define ONIGENC_CTYPE_CNTRL (1<< 2) +#define ONIGENC_CTYPE_DIGIT (1<< 3) +#define ONIGENC_CTYPE_GRAPH (1<< 4) +#define ONIGENC_CTYPE_LOWER (1<< 5) +#define ONIGENC_CTYPE_PRINT (1<< 6) +#define ONIGENC_CTYPE_PUNCT (1<< 7) +#define ONIGENC_CTYPE_SPACE (1<< 8) +#define ONIGENC_CTYPE_UPPER (1<< 9) +#define ONIGENC_CTYPE_XDIGIT (1<<10) +#define ONIGENC_CTYPE_WORD (1<<11) +#define ONIGENC_CTYPE_ASCII (1<<12) +#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) + +/* ctype support level */ +#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0 +#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1 + + +#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#ifdef ONIG_RUBY_M17N + +#include /* for isblank(), isgraph() */ + +#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf) +#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p) + +#define ONIGENC_IS_FOLD_MATCH(enc) FALSE +#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE +#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + onigenc_is_allowed_reverse_match(enc, s, end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + onigenc_get_left_adjust_char_head(enc, start, s) +#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0 +#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ + ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b) +#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) +#define ONIGENC_MBC_MAXLEN_DIST(enc) \ + (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \ + : ONIG_INFINITE_DISTANCE) +#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code)) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf) + +#if 0 +#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */ +#endif + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \ + onigenc_is_code_ctype(enc,code,ctype) -#if defined(RUBY_PLATFORM) && !defined(M17N_H) -#undef ismbchar -#define ismbchar(c) (mbclen((c)) != 1) -#define mbclen(c) RegDefaultCharEncoding[(unsigned char )(c)] +#ifdef isblank +# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code) +#else +# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t') +#endif +#ifdef isgraph +# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code) +#else +# define ONIGENC_IS_CODE_GRAPH(enc,code) \ + (isprint((int )code) && !isspace((int )code)) #endif -typedef unsigned int RegOptionType; -typedef unsigned char* RegTransTableType; -typedef unsigned int RegDistance; -typedef unsigned char UChar; +#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code) +#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code) +#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code) +#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code) +#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code) +#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code) +#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code) +#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code) +#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code) +#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code) + +ONIG_EXTERN +int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); +ONIG_EXTERN +int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN +int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf)); +ONIG_EXTERN +int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p)); +ONIG_EXTERN +int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)); + +#else /* ONIG_RUBY_M17N */ + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf) +#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p) + +#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match) +#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb) +#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + (enc)->left_adjust_char_head(start, s) +#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \ + (enc)->get_all_fold_match_code(codes) +#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \ + (enc)->get_fold_match_info(p,end,info) +#define ONIGENC_STEP_BACK(enc,start,s,n) \ + onigenc_step_back((enc),(start),(s),(n)) + +#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)]) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype) + +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ + (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr) + +ONIG_EXTERN +UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n)); + +#endif /* is not ONIG_RUBY_M17N */ + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init P_(()); +ONIG_EXTERN +int onigenc_set_default_encoding P_((OnigEncoding enc)); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding P_(()); +ONIG_EXTERN +void onigenc_set_default_caseconv_table P_((UChar* table)); +ONIG_EXTERN +UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev)); +ONIG_EXTERN +UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N) +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +#undef ismbchar +#define ismbchar(c) (mbclen((c)) != 1) +#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)]) +#endif -#define REG_OPTION_DEFAULT REG_OPTION_NONE +typedef unsigned int OnigOptionType; -/* GNU regex options */ -#define RE_OPTION_IGNORECASE (1L) -#define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE << 1) -#define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED << 1) -#define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE << 1) -#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) -#define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE /* options */ -#define REG_OPTION_NONE 0 -#define REG_OPTION_SINGLELINE RE_OPTION_SINGLELINE -#define REG_OPTION_MULTILINE RE_OPTION_MULTILINE -#define REG_OPTION_IGNORECASE RE_OPTION_IGNORECASE -#define REG_OPTION_EXTEND RE_OPTION_EXTENDED -#define REG_OPTION_FIND_LONGEST RE_OPTION_LONGEST -#define REG_OPTION_FIND_NOT_EMPTY (REG_OPTION_FIND_LONGEST << 1) -#define REG_OPTION_NEGATE_SINGLELINE (REG_OPTION_FIND_NOT_EMPTY << 1) -#define REG_OPTION_CAPTURE_ONLY_NAMED_GROUP (REG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_NONE 0 +#define ONIG_OPTION_IGNORECASE 1L +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) /* options (search time) */ -#define REG_OPTION_NOTBOL (REG_OPTION_CAPTURE_ONLY_NAMED_GROUP << 1) -#define REG_OPTION_NOTEOL (REG_OPTION_NOTBOL << 1) -#define REG_OPTION_POSIX_REGION (REG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define REG_OPTION_ON(options,regopt) ((options) |= (regopt)) -#define REG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) -#define IS_REG_OPTION_ON(options,option) ((options) & (option)) +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) /* syntax */ typedef struct { unsigned int op; unsigned int op2; unsigned int behavior; - RegOptionType options; /* default option */ -} RegSyntaxType; - -REG_EXTERN RegSyntaxType RegSyntaxPosixBasic; -REG_EXTERN RegSyntaxType RegSyntaxPosixExtended; -REG_EXTERN RegSyntaxType RegSyntaxEmacs; -REG_EXTERN RegSyntaxType RegSyntaxGrep; -REG_EXTERN RegSyntaxType RegSyntaxGnuRegex; -REG_EXTERN RegSyntaxType RegSyntaxJava; -REG_EXTERN RegSyntaxType RegSyntaxPerl; -REG_EXTERN RegSyntaxType RegSyntaxRuby; - -/* predefined syntaxes (see regcomp.c) */ -#define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic) -#define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended) -#define REG_SYNTAX_EMACS (&RegSyntaxEmacs) -#define REG_SYNTAX_GREP (&RegSyntaxGrep) -#define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex) -#define REG_SYNTAX_JAVA (&RegSyntaxJava) -#define REG_SYNTAX_PERL (&RegSyntaxPerl) -#define REG_SYNTAX_RUBY (&RegSyntaxRuby) + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regparse.c) */ +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) /* default syntax */ -#define REG_SYNTAX_DEFAULT RegDefaultSyntax - -REG_EXTERN RegSyntaxType* RegDefaultSyntax; +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax /* syntax (operators) */ -#define REG_SYN_OP_ANYCHAR 1 /* . */ -#define REG_SYN_OP_0INF (1<<1) /* * */ -#define REG_SYN_OP_ESC_0INF (1<<2) -#define REG_SYN_OP_1INF (1<<3) /* + */ -#define REG_SYN_OP_ESC_1INF (1<<4) -#define REG_SYN_OP_01 (1<<5) /* ? */ -#define REG_SYN_OP_ESC_01 (1<<6) -#define REG_SYN_OP_INTERVAL (1<<7) /* {lower,upper} */ -#define REG_SYN_OP_ESC_INTERVAL (1<<8) -#define REG_SYN_OP_ALT (1<<9) /* | */ -#define REG_SYN_OP_ESC_ALT (1<<10) -#define REG_SYN_OP_SUBEXP (1<<11) /* (...) */ -#define REG_SYN_OP_ESC_SUBEXP (1<<12) -#define REG_SYN_OP_ESC_BUF_ANCHOR (1<<13) /* \A, \Z, \z */ -#define REG_SYN_OP_ESC_GNU_BUF_ANCHOR (1<<14) /* \`, \' */ -#define REG_SYN_OP_BACK_REF (1<<15) /* \num */ -#define REG_SYN_OP_CC (1<<16) /* [...] */ -#define REG_SYN_OP_ESC_WORD (1<<17) /* \w, \W */ -#define REG_SYN_OP_ESC_WORD_BEGIN_END (1<<18) /* \<. \> */ -#define REG_SYN_OP_ESC_WORD_BOUND (1<<19) /* \b, \B */ -#define REG_SYN_OP_ESC_WHITE_SPACE (1<<20) /* \s, \S */ -#define REG_SYN_OP_ESC_DIGIT (1<<21) /* \d, \D */ -#define REG_SYN_OP_LINE_ANCHOR (1<<22) /* ^, $ */ -#define REG_SYN_OP_POSIX_BRACKET (1<<23) /* [:xxxx:] */ -#define REG_SYN_OP_NON_GREEDY (1<<24) /* ??,*?,+?,{n,m}? */ -#define REG_SYN_OP_ESC_CONTROL_CHAR (1<<25) /* \n,\r,\t,\a ... */ -#define REG_SYN_OP_ESC_C_CONTROL (1<<26) /* \cx */ -#define REG_SYN_OP_ESC_OCTAL3 (1<<27) /* \OOO */ -#define REG_SYN_OP_ESC_X_HEX2 (1<<28) /* \xHH */ -#define REG_SYN_OP_ESC_X_BRACE_HEX8 (1<<29) /* \x{7HHHHHHH} */ -#define REG_SYN_OP_SUBEXP_EFFECT (1<<30) /* (?...) */ -#define REG_SYN_OP_QUOTE (1<<31) /* \Q...\E */ - -#define REG_SYN_OP2_OPTION_PERL (1<<0) /* (?imsx), (?-imsx) */ -#define REG_SYN_OP2_OPTION_RUBY (1<<1) /* (?imx), (?-imx) */ -#define REG_SYN_OP2_POSSESSIVE_REPEAT (1<<2) /* ?+,*+,++ */ -#define REG_SYN_OP2_POSSESSIVE_INTERVAL (1<<3) /* {n,m}+ */ -#define REG_SYN_OP2_CCLASS_SET (1<<4) /* [...&&..[..].] */ -#define REG_SYN_OP2_NAMED_SUBEXP (1<<5) /*(?.),\k*/ -#define REG_SYN_OP2_SUBEXP_CALL (1<<6) /* \g */ -#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<7) /* \C-x */ -#define REG_SYN_OP2_ESC_M_BAR_META (1<<8) /* \M-x */ -#define REG_SYN_OP2_ESC_V_VTAB (1<<9) /* \v as VTAB */ -#define REG_SYN_OP2_ESC_U_HEX4 (1<<10) /* \uHHHH */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g, \g */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ /* syntax (behavior) */ -#define REG_SYN_CONTEXT_INDEP_ANCHORS (1<<0) /* not implemented */ -#define REG_SYN_CONTEXT_INDEP_OPS (1<<1) /* ?, *, +, {n,m} */ -#define REG_SYN_CONTEXT_INVALID_OPS (1<<2) /* error or ignore */ -#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<3) /* ...)... */ -#define REG_SYN_ALLOW_INVALID_INTERVAL (1<<4) /* {??? */ -#define REG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ etc.*/ -#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ - -/* syntax in char class [...] */ -#define REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED (1<<10) /* [,-,] */ -#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<11) -#define REG_SYN_ESCAPE_IN_CC (1<<12) /* [...\w..] etc.. */ -#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<13) -#define REG_SYN_ALLOW_RANGE_OP_IN_CC (1<<14) /* [0-9-a] */ - +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?)(?) */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 /* error codes */ -#define REG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -300) +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) /* normal return */ -#define REG_NORMAL 0 -#define REG_MISMATCH -1 -#define REG_NO_SUPPORT_CONFIG -2 +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 /* internal error */ -#define REGERR_MEMORY -5 -#define REGERR_MATCH_STACK_LIMIT_OVER -6 -#define REGERR_TYPE_BUG -10 -#define REGERR_PARSER_BUG -11 -#define REGERR_STACK_BUG -12 -#define REGERR_UNDEFINED_BYTECODE -13 -#define REGERR_UNEXPECTED_BYTECODE -14 -#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 -#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 /* syntax error */ -#define REGERR_END_PATTERN_AT_LEFT_BRACE -100 -#define REGERR_END_PATTERN_AT_LEFT_BRACKET -101 -#define REGERR_EMPTY_CHAR_CLASS -102 -#define REGERR_PREMATURE_END_OF_CHAR_CLASS -103 -#define REGERR_END_PATTERN_AT_BACKSLASH -104 -#define REGERR_END_PATTERN_AT_META -105 -#define REGERR_END_PATTERN_AT_CONTROL -106 -#define REGERR_META_CODE_SYNTAX -108 -#define REGERR_CONTROL_CODE_SYNTAX -109 -#define REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 -#define REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 -#define REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 -#define REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 -#define REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 -#define REGERR_NESTED_REPEAT_OPERATOR -115 -#define REGERR_UNMATCHED_CLOSE_PARENTHESIS -116 -#define REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 -#define REGERR_END_PATTERN_IN_GROUP -118 -#define REGERR_UNDEFINED_GROUP_OPTION -119 -#define REGERR_INVALID_POSIX_BRACKET_TYPE -121 -#define REGERR_INVALID_LOOK_BEHIND_PATTERN -122 -#define REGERR_INVALID_REPEAT_RANGE_PATTERN -123 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_BACKSLASH -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 /* values error (syntax error) */ -#define REGERR_TOO_BIG_NUMBER -200 -#define REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 -#define REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 -#define REGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 -#define REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 -#define REGERR_TOO_MANY_MULTI_BYTE_RANGES -205 -#define REGERR_TOO_SHORT_MULTI_BYTE_STRING -206 -#define REGERR_TOO_BIG_BACKREF_NUMBER -207 -#define REGERR_INVALID_BACKREF -208 -#define REGERR_TOO_BIG_WIDE_CHAR_VALUE -209 -#define REGERR_TOO_LONG_WIDE_CHAR_VALUE -210 -#define REGERR_INVALID_WIDE_CHAR_VALUE -211 -#define REGERR_INVALID_SUBEXP_NAME -212 -#define REGERR_UNDEFINED_NAME_REFERENCE -213 -#define REGERR_UNDEFINED_GROUP_REFERENCE -214 -#define REGERR_MULTIPLEX_DEFINITION_NAME_CALL -215 -#define REGERR_NEVER_ENDING_RECURSION -216 +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 /* errors related to thread */ -#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT -301 +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) /* match result region type */ struct re_registers { @@ -284,164 +575,143 @@ struct re_registers { int num_regs; int* beg; int* end; + /* extended */ + struct re_registers** list; /* capture history. list[1]-list[31] */ }; -#define REG_REGION_NOTPOS -1 +#define ONIG_REGION_NOTPOS -1 -typedef struct re_registers RegRegion; +typedef struct re_registers OnigRegion; typedef struct { UChar* par; UChar* par_end; -} RegErrorInfo; +} OnigErrorInfo; typedef struct { int lower; int upper; -} RegRepeatRange; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc) P_((char* s)); +extern void onig_null_warn P_((char* s)); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 /* regex_t state */ -#define REG_STATE_NORMAL 0 -#define REG_STATE_SEARCHING 1 -#define REG_STATE_COMPILING -1 -#define REG_STATE_MODIFY -2 +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 -#define REG_STATE(regex) \ - ((regex)->state > 0 ? REG_STATE_SEARCHING : (regex)->state) +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) typedef struct re_pattern_buffer { - /* common members in BBuf(bytes-buffer) type */ + /* common members of BBuf(bytes-buffer) */ unsigned char* p; /* compiled pattern */ unsigned int used; /* used space for p */ unsigned int alloc; /* allocated space for p */ - int state; /* normal, searching, compiling */ - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_NULL_CHECK_START/END id counter */ - int num_call; /* number of subexp call */ - unsigned int backtrack_mem; + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ int stack_pop_level; int repeat_range_alloc; - RegRepeatRange* repeat_range; + OnigRepeatRange* repeat_range; - RegCharEncoding enc; - RegOptionType options; - RegSyntaxType* syntax; + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; void* name_table; - /* optimize info (string search and char-map and anchor) */ + /* optimization info (string search, char-map and anchors) */ int optimize; /* optimize flag */ int threshold_len; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - RegDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - RegDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ int sub_anchor; /* start-anchor for exact or map */ unsigned char *exact; unsigned char *exact_end; - unsigned char map[REG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ int *int_map; /* BM skip for exact_len > 255 */ int *int_map_backward; /* BM skip for backward search */ - RegDistance dmin; /* min-distance of exact or map */ - RegDistance dmax; /* max-distance of exact or map */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict on multi-thread */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ } regex_t; -#ifdef RUBY_PLATFORM -#define re_mbcinit ruby_re_mbcinit -#define re_compile_pattern ruby_re_compile_pattern -#define re_recompile_pattern ruby_re_recompile_pattern -#define re_free_pattern ruby_re_free_pattern -#define re_adjust_startpos ruby_re_adjust_startpos -#define re_search ruby_re_search -#define re_match ruby_re_match -#define re_set_casetable ruby_re_set_casetable -#define re_copy_registers ruby_re_copy_registers -#define re_free_registers ruby_re_free_registers -#define register_info_type ruby_register_info_type -#define re_error_code_to_str ruby_error_code_to_str - -#define ruby_error_code_to_str regex_error_code_to_str -#define ruby_re_copy_registers regex_region_copy -#else -#define re_error_code_to_str regex_error_code_to_str -#define re_copy_registers regex_region_copy -#endif /* Oniguruma Native API */ -REG_EXTERN -int regex_init P_((void)); -REG_EXTERN -int regex_error_code_to_str PV_((UChar* s, int err_code, ...)); -REG_EXTERN -int regex_new P_((regex_t**, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo)); -REG_EXTERN -void regex_free P_((regex_t*)); -REG_EXTERN -int regex_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo)); -REG_EXTERN -int regex_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, RegRegion* region, RegOptionType option)); -REG_EXTERN -int regex_match P_((regex_t*, UChar* str, UChar* end, UChar* at, RegRegion* region, RegOptionType option)); -REG_EXTERN -RegRegion* regex_region_new P_((void)); -REG_EXTERN -void regex_region_free P_((RegRegion* region, int free_self)); -REG_EXTERN -void regex_region_copy P_((RegRegion* to, RegRegion* from)); -REG_EXTERN -void regex_region_clear P_((RegRegion* region)); -REG_EXTERN -int regex_region_resize P_((RegRegion* region, int n)); -REG_EXTERN -int regex_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, +ONIG_EXTERN +int onig_init P_((void)); +ONIG_EXTERN +int onig_error_code_to_str PV_((UChar* s, int err_code, ...)); +ONIG_EXTERN +void onig_set_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +void onig_set_verb_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +void onig_free P_((regex_t*)); +ONIG_EXTERN +int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +OnigRegion* onig_region_new P_((void)); +ONIG_EXTERN +void onig_region_free P_((OnigRegion* region, int free_self)); +ONIG_EXTERN +void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); +ONIG_EXTERN +void onig_region_clear P_((OnigRegion* region)); +ONIG_EXTERN +int onig_region_resize P_((OnigRegion* region, int n)); +ONIG_EXTERN +int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, int** nums)); -REG_EXTERN -int regex_foreach_name P_((regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)); -REG_EXTERN -UChar* regex_get_prev_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); -REG_EXTERN -UChar* regex_get_left_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); -REG_EXTERN -UChar* regex_get_right_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); -REG_EXTERN -void regex_set_default_trans_table P_((UChar* table)); -REG_EXTERN -int regex_set_default_syntax P_((RegSyntaxType* syntax)); -REG_EXTERN -int regex_end P_((void)); -REG_EXTERN -const char* regex_version P_((void)); - - -/* GNU regex API */ -#ifdef REG_RUBY_M17N -REG_EXTERN -void re_mbcinit P_((RegCharEncoding)); -#else -REG_EXTERN -void re_mbcinit P_((int)); -#endif - -REG_EXTERN -int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -REG_EXTERN -int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -REG_EXTERN -void re_free_pattern P_((struct re_pattern_buffer*)); -REG_EXTERN -int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); -REG_EXTERN -int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); -REG_EXTERN -int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); -REG_EXTERN -void re_set_casetable P_((const char*)); -REG_EXTERN -void re_free_registers P_((struct re_registers*)); -REG_EXTERN -int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ +ONIG_EXTERN +int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg)); +ONIG_EXTERN +int onig_number_of_names P_((regex_t* reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((regex_t* reg)); +ONIG_EXTERN +OnigOptionType onig_get_options P_((regex_t* reg)); +ONIG_EXTERN +OnigSyntaxType* onig_get_syntax P_((regex_t* reg)); +ONIG_EXTERN +int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN +void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); +ONIG_EXTERN +void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); +ONIG_EXTERN +void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); +ONIG_EXTERN +void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); +ONIG_EXTERN +int onig_set_meta_char P_((unsigned int what, unsigned int c)); +ONIG_EXTERN +int onig_end P_((void)); +ONIG_EXTERN +const char* onig_version P_((void)); #endif /* ONIGURUMA_H */ diff --git a/ext/mbstring/oniguruma/php_compat.h b/ext/mbstring/oniguruma/php_compat.h index c261cc2bb0..d7f1ad56f1 100644 --- a/ext/mbstring/oniguruma/php_compat.h +++ b/ext/mbstring/oniguruma/php_compat.h @@ -1,46 +1,7 @@ #ifndef _PHP_MBREGEX_COMPAT_H #define _PHP_MBREGEX_COMPAT_H -#define RegCharEncoding php_mb_reg_char_encoding -#define RegRegion php_mb_reg_region -#define RegDefaultCharEncoding php_mb_reg_default_char_encoding -#define REG_MBLEN_TABLE PHP_MBSTR_REG_MBLEN_TABLE -#define RegSyntaxType php_mb_reg_syntax_type -#define RegOptionType php_mb_reg_option_type -#define re_registers php_mb_re_registers -#define RegErrorInfo php_mb_reg_error_info #define re_pattern_buffer php_mb_re_pattern_buffer #define regex_t php_mb_regex_t -#define regex_init php_mb_regex_init -#define regex_new php_mb_regex_new -#define regex_free php_mb_regex_free -#define regex_recompile php_mb_regex_recompile -#define regex_search php_mb_regex_search -#define regex_match php_mb_regex_match -#define regex_region_new php_mb_regex_region_new -#define regex_region_free php_mb_regex_region_free -#define regex_region_copy php_mb_regex_region_copy -#define regex_region_clear php_mb_regex_region_clear -#define regex_region_resize php_mb_regex_region_resize -#define regex_name_to_group_numbers php_mb_regex_name_to_group_numbers -#define regex_foreach_names php_mb_regex_foreach_names -#define regex_get_prev_char_head php_mb_regex_get_prev_char_head -#define regex_get_left_adjust_char_head php_mb_get_left_adjust_char_head -#define regex_get_right_adjust_char_head php_mb_get_right_adjust_char_head -#define regex_set_default_trans_table php_mb_get_default_trans_table -#define regex_set_default_syntax php_mb_regex_set_default_syntax -#define regex_end php_mb_regex_end -#define re_mbcinit php_mb_re_mbcinit -#define re_compile_pattern php_mb_re_compile_pattern -#define re_recompile_pattern php_mb_re_recompile_pattern -#define re_free_pattern php_mb_re_free_pattern -#define re_adjust_startpos php_mb_re_adjust_startpos -#define re_search php_mb_re_search -#define re_match php_mb_re_match -#define re_set_casetable php_mb_re_set_casetable -#define php_mbregex_region_copy php_mb_re_copy_registers -#define re_free_registers php_mb_re_free_registers -#define register_info_type php_mb_register_info_type -#define regex_error_code_to_str php_mb_regex_error_code_to_str #endif /* _PHP_MBREGEX_COMPAT_H */ diff --git a/ext/mbstring/oniguruma/re.c.168.patch b/ext/mbstring/oniguruma/re.c.168.patch deleted file mode 100644 index fd1c1bf5d8..0000000000 --- a/ext/mbstring/oniguruma/re.c.168.patch +++ /dev/null @@ -1,56 +0,0 @@ ---- re.c.ruby_orig Tue Feb 4 15:52:29 2003 -+++ re.c Tue Mar 18 19:37:49 2003 -@@ -380,7 +380,8 @@ make_regexp(s, len, flag) - int len, flag; - { - Regexp *rp; -- char *err; -+ char err[REG_MAX_ERROR_MESSAGE_LEN]; -+ int r; - - /* Handle escaped characters first. */ - -@@ -389,16 +390,17 @@ make_regexp(s, len, flag) - from that. - */ - -- rp = ALLOC(Regexp); -- MEMZERO((char *)rp, Regexp, 1); -- rp->buffer = ALLOC_N(char, 16); -- rp->allocated = 16; -- rp->fastmap = ALLOC_N(char, 256); -+ r = re_alloc_pattern(&rp); -+ if (r) { -+ re_error_code_to_str(err, r); -+ rb_reg_raise(s, len, err, 0); -+ } -+ - if (flag) { - rp->options = flag; - } -- err = re_compile_pattern(s, len, rp); -- if (err != NULL) { -+ r = re_compile_pattern(s, len, rp, err); -+ if (r != 0) { - rb_reg_raise(s, len, err, 0); - } - -@@ -532,14 +534,14 @@ rb_reg_prepare_re(re) - } - - if (need_recompile) { -- char *err; -+ char err[REG_MAX_ERROR_MESSAGE_LEN]; -+ int r; - - if (FL_TEST(re, KCODE_FIXED)) - kcode_set_option(re); - rb_reg_check(re); -- RREGEXP(re)->ptr->fastmap_accurate = 0; -- err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr); -- if (err != NULL) { -+ r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err); -+ if (r != 0) { - rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re); - } - } diff --git a/ext/mbstring/oniguruma/re.c.180.patch b/ext/mbstring/oniguruma/re.c.180.patch deleted file mode 100644 index 08ef2397fb..0000000000 --- a/ext/mbstring/oniguruma/re.c.180.patch +++ /dev/null @@ -1,66 +0,0 @@ ---- re.c.ruby_orig Fri Feb 7 15:35:26 2003 -+++ re.c Tue Mar 18 18:51:21 2003 -@@ -444,7 +444,7 @@ rb_reg_to_s(re) - kcode_set_option(re); - rp = ALLOC(Regexp); - MEMZERO((char *)rp, Regexp, 1); -- err = re_compile_pattern(++ptr, len -= 2, rp) != 0; -+ err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0); - kcode_reset_option(); - re_free_pattern(rp); - } -@@ -538,7 +538,8 @@ make_regexp(s, len, flags) - int flags; - { - Regexp *rp; -- char *err; -+ char err[REG_MAX_ERROR_MESSAGE_LEN]; -+ int r; - - /* Handle escaped characters first. */ - -@@ -547,17 +548,18 @@ make_regexp(s, len, flags) - from that. - */ - -- rp = ALLOC(Regexp); -- MEMZERO((char *)rp, Regexp, 1); -- rp->buffer = ALLOC_N(char, 16); -- rp->allocated = 16; -- rp->fastmap = ALLOC_N(char, 256); -+ r = re_alloc_pattern(&rp); -+ if (r) { -+ re_error_code_to_str((UChar* )err, r); -+ rb_reg_raise(s, len, err, 0); -+ } -+ - if (flags) { - rp->options = flags; - } -- err = re_compile_pattern(s, len, rp); -+ r = re_compile_pattern(s, len, rp, err); - -- if (err != NULL) { -+ if (r != 0) { - rb_reg_raise(s, len, err, 0); - } - return rp; -@@ -692,14 +694,14 @@ rb_reg_prepare_re(re) - } - - if (need_recompile) { -- char *err; -+ char err[REG_MAX_ERROR_MESSAGE_LEN]; -+ int r; - - if (FL_TEST(re, KCODE_FIXED)) - kcode_set_option(re); - rb_reg_check(re); -- RREGEXP(re)->ptr->fastmap_accurate = 0; -- err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr); -- if (err != NULL) { -+ r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err); -+ if (r != 0) { - rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re); - } - } diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index fd8e56a7a7..9a89b92ecb 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,12 +2,12 @@ regcomp.c - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "regparse.h" -#ifndef UNALIGNED_WORD_ACCESS +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; #endif @@ -18,83 +18,28 @@ swap_node(Node* a, Node* b) c = *a; *a = *b; *b = c; } -static RegDistance -distance_add(RegDistance d1, RegDistance d2) +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) { - if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) - return INFINITE_DISTANCE; + if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) + return ONIG_INFINITE_DISTANCE; else { - if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2; - else return INFINITE_DISTANCE; + if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; + else return ONIG_INFINITE_DISTANCE; } } -static RegDistance -distance_multiply(RegDistance d, int m) +static OnigDistance +distance_multiply(OnigDistance d, int m) { if (m == 0) return 0; - if (d < INFINITE_DISTANCE / m) + if (d < ONIG_INFINITE_DISTANCE / m) return d * m; else - return INFINITE_DISTANCE; + return ONIG_INFINITE_DISTANCE; } -#if 0 -static RegDistance -distance_distance(RegDistance d1, RegDistance d2) -{ - if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) - return INFINITE_DISTANCE; - - if (d1 > d2) return d1 - d2; - else return d2 - d1; -} -#endif - -RegCharEncoding RegDefaultCharEncoding = REGCODE_DEFAULT; -static UChar AmbiguityTable[REG_CHAR_TABLE_SIZE]; - -#define IS_AMBIGUITY_CHAR(enc, c) (AmbiguityTable[(c)] >= 2) - -#ifdef DEFAULT_TRANSTABLE_EXIST - -static UChar DTT[] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; -#endif - static int bitset_is_empty(BitSetRef bs) { @@ -105,7 +50,7 @@ bitset_is_empty(BitSetRef bs) return 1; } -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG static int bitset_on_num(BitSetRef bs) { @@ -120,10 +65,10 @@ bitset_on_num(BitSetRef bs) #endif extern int -regex_bbuf_init(BBuf* buf, int size) +onig_bbuf_init(BBuf* buf, int size) { buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(REGERR_MEMORY); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); buf->alloc = size; buf->used = 0; @@ -139,7 +84,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size) UnsetAddr* p; p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); uslist->num = 0; uslist->alloc = size; uslist->us = p; @@ -162,7 +107,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) if (uslist->num >= uslist->alloc) { size = uslist->alloc * 2; p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); - CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); uslist->alloc = size; uslist->us = p; } @@ -175,122 +120,9 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) #endif /* USE_SUBEXP_CALL */ -#ifdef REG_RUBY_M17N - -extern int -regex_is_allow_reverse_match(RegCharEncoding enc, UChar* s, UChar* end) -{ - return IS_INDEPENDENT_TRAIL(enc); -} - -#else /* REG_RUBY_M17N */ - -const char REG_MBLEN_TABLE[][REG_CHAR_TABLE_SIZE] = { - { /* ascii */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - }, - { /* euc-jp */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 - }, - { /* sjis */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - }, - { /* utf8 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 - } -}; - -extern int -regex_mb_max_length(RegCharEncoding code) -{ - /* can't use switch statement, code isn't int type. */ - if (code == REGCODE_ASCII) return 1; - else if (code == REGCODE_EUCJP) return 3; - else if (code == REGCODE_SJIS) return 2; - else return 6; /* REGCODE_UTF8 */ -} - -extern int -regex_is_allow_reverse_match(RegCharEncoding enc, UChar* s, UChar* end) -{ - UChar c; - - if (IS_INDEPENDENT_TRAIL(enc)) return 1; - - c = *s; - if (enc == REGCODE_EUCJP) { - if (c <= 0x7e || c == 0x8e || c == 0x8f) return 1; - } - else if (enc == REGCODE_SJIS) { - if (c <= 0x3f || c == 0x7f) return 1; - } - return 0; -} - -#endif /* not REG_RUBY_M17N */ - +#if 0 static int -bitset_mbmaxlen(BitSetRef bs, int negative, RegCharEncoding enc) +bitset_mbmaxlen(BitSetRef bs, int negative, OnigEncoding enc) { int i; int len, maxlen = 0; @@ -298,7 +130,7 @@ bitset_mbmaxlen(BitSetRef bs, int negative, RegCharEncoding enc) if (negative) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! BITSET_AT(bs, i)) { - len = mblen(enc, i); + len = enc_len(enc, i); if (len > maxlen) maxlen = len; } } @@ -306,14 +138,14 @@ bitset_mbmaxlen(BitSetRef bs, int negative, RegCharEncoding enc) else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (BITSET_AT(bs, i)) { - len = mblen(enc, i); + len = enc_len(enc, i); if (len > maxlen) maxlen = len; } } } return maxlen; } - +#endif static int add_opcode(regex_t* reg, int opcode) @@ -327,7 +159,7 @@ add_rel_addr(regex_t* reg, int addr) { RelAddrType ra = (RelAddrType )addr; -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_ADD(reg, &ra, SIZE_RELADDR); #else UChar buf[SERIALIZE_BUFSIZE]; @@ -342,7 +174,7 @@ add_abs_addr(regex_t* reg, int addr) { AbsAddrType ra = (AbsAddrType )addr; -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_ADD(reg, &ra, SIZE_ABSADDR); #else UChar buf[SERIALIZE_BUFSIZE]; @@ -357,7 +189,7 @@ add_length(regex_t* reg, int len) { LengthType l = (LengthType )len; -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_ADD(reg, &l, SIZE_LENGTH); #else UChar buf[SERIALIZE_BUFSIZE]; @@ -372,7 +204,7 @@ add_mem_num(regex_t* reg, int num) { MemNumType n = (MemNumType )num; -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_ADD(reg, &n, SIZE_MEMNUM); #else UChar buf[SERIALIZE_BUFSIZE]; @@ -388,7 +220,7 @@ add_repeat_num(regex_t* reg, int num) { RepeatNumType n = (RepeatNumType )num; -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_ADD(reg, &n, SIZE_REPEATNUM); #else UChar buf[SERIALIZE_BUFSIZE]; @@ -400,9 +232,9 @@ add_repeat_num(regex_t* reg, int num) #endif static int -add_option(regex_t* reg, RegOptionType option) +add_option(regex_t* reg, OnigOptionType option) { -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_ADD(reg, &option, SIZE_OPTION); #else UChar buf[SERIALIZE_BUFSIZE]; @@ -438,7 +270,7 @@ add_bitset(regex_t* reg, BitSetRef bs) } static int -add_opcode_option(regex_t* reg, int opcode, RegOptionType option) +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) { int r; @@ -502,12 +334,12 @@ select_str_opcode(int mb_len, int str_len, int ignore_case) } static int -compile_tree_empty_check(Node* node, regex_t* reg, int empty_check) +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) { int r; int saved_num_null_check = reg->num_null_check; - if (empty_check) { + if (empty_info != 0) { r = add_opcode(reg, OP_NULL_CHECK_START); if (r) return r; r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ @@ -518,8 +350,14 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_check) r = compile_tree(node, reg); if (r) return r; - if (empty_check) { - r = add_opcode(reg, OP_NULL_CHECK_END); + if (empty_info != 0) { + if (empty_info == NQ_TARGET_IS_EMPTY) + r = add_opcode(reg, OP_NULL_CHECK_END); + else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); + else if (empty_info == NQ_TARGET_IS_EMPTY_REC) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + if (r) return r; r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ } @@ -594,7 +432,7 @@ static int compile_length_string_node(StrNode* sn, regex_t* reg) { int rlen, r, len, prev_len, slen, ambig, ic; - RegCharEncoding code = reg->enc; + OnigEncoding enc = reg->enc; UChar *p, *prev; if (sn->end <= sn->s) @@ -603,9 +441,9 @@ compile_length_string_node(StrNode* sn, regex_t* reg) ic = IS_IGNORECASE(reg->options); p = prev = sn->s; - prev_len = mblen(code, *p); + prev_len = enc_len(enc, *p); if (ic != 0 && prev_len == 1) - ambig = IS_AMBIGUITY_CHAR(reg->enc, *p); + ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p); else ambig = 0; @@ -614,18 +452,18 @@ compile_length_string_node(StrNode* sn, regex_t* reg) rlen = 0; for (; p < sn->end; ) { - len = mblen(code, *p); + len = enc_len(enc, *p); if (len == prev_len) { slen++; if (ic != 0 && ambig == 0 && len == 1) - ambig = IS_AMBIGUITY_CHAR(reg->enc, *p); + ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p); } else { r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; if (ic != 0 && len == 1) - ambig = IS_AMBIGUITY_CHAR(reg->enc, *p); + ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p); else ambig = 0; @@ -654,7 +492,7 @@ static int compile_string_node(StrNode* sn, regex_t* reg) { int r, len, prev_len, slen, ambig, ic; - RegCharEncoding code = reg->enc; + OnigEncoding enc = reg->enc; UChar *p, *prev; if (sn->end <= sn->s) @@ -663,10 +501,11 @@ compile_string_node(StrNode* sn, regex_t* reg) ic = IS_IGNORECASE(reg->options); p = prev = sn->s; - prev_len = mblen(code, *p); + prev_len = enc_len(enc, *p); if (ic != 0 && prev_len == 1) { - ambig = IS_AMBIGUITY_CHAR(reg->enc, *p); - if (ambig != 0) *p = TOLOWER(reg->enc, *p); + ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p); + if (ambig != 0) + ONIGENC_MBC_TO_LOWER(reg->enc, p, p); } else ambig = 0; @@ -675,21 +514,21 @@ compile_string_node(StrNode* sn, regex_t* reg) slen = 1; for (; p < sn->end; ) { - len = mblen(code, *p); + len = enc_len(enc, *p); if (len == prev_len) { slen++; if (ic != 0 && len == 1) { if (ambig == 0) - ambig = IS_AMBIGUITY_CHAR(reg->enc, *p); - if (ambig != 0) *p = TOLOWER(reg->enc, *p); + ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p); + if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p); } } else { r = add_compile_string(prev, prev_len, slen, reg, ambig); if (r) return r; if (ic != 0 && len == 1) { - ambig = IS_AMBIGUITY_CHAR(reg->enc, *p); - if (ambig != 0) *p = TOLOWER(reg->enc, *p); + ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p); + if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p); } else ambig = 0; @@ -714,20 +553,20 @@ compile_string_raw_node(StrNode* sn, regex_t* reg) } static int -add_multi_byte_cclass_offset(BBuf* mbuf, regex_t* reg, int offset) +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) { -#ifdef UNALIGNED_WORD_ACCESS - add_length(reg, mbuf->used - offset); - return add_bytes(reg, mbuf->p + offset, mbuf->used - offset); +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + add_length(reg, mbuf->used); + return add_bytes(reg, mbuf->p, mbuf->used); #else int r, pad_size; UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; GET_ALIGNMENT_PAD_SIZE(p, pad_size); - add_length(reg, mbuf->used - offset + (WORD_ALIGNMENT_SIZE - 1)); + add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - r = add_bytes(reg, mbuf->p + offset, mbuf->used - offset); + r = add_bytes(reg, mbuf->p, mbuf->used); /* padding for return value from compile_length_cclass_node() to be fix. */ pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; @@ -747,12 +586,12 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg) else { if (bitset_is_empty(cc->bs)) { /* SIZE_BITSET is included in mbuf->used. */ - len = SIZE_OPCODE - SIZE_BITSET; + len = SIZE_OPCODE; } else { - len = SIZE_OPCODE; + len = SIZE_OPCODE + SIZE_BITSET; } -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS len += SIZE_LENGTH + cc->mbuf->used; #else len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); @@ -778,7 +617,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT); else add_opcode(reg, OP_CCLASS_MB); - r = add_multi_byte_cclass_offset(cc->mbuf, reg, SIZE_BITSET); + r = add_multi_byte_cclass(cc->mbuf, reg); } else { if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT); @@ -786,7 +625,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) r = add_bitset(reg, cc->bs); if (r) return r; - r = add_multi_byte_cclass_offset(cc->mbuf, reg, SIZE_BITSET); + r = add_multi_byte_cclass(cc->mbuf, reg); } } @@ -798,20 +637,20 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) { #define REPEAT_RANGE_ALLOC 4 - RegRepeatRange* p; + OnigRepeatRange* p; if (reg->repeat_range_alloc == 0) { - p = (RegRepeatRange* )xmalloc(sizeof(RegRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY); + p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); reg->repeat_range = p; reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; } else if (reg->repeat_range_alloc <= id) { int n; n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; - p = (RegRepeatRange* )xrealloc(reg->repeat_range, - sizeof(RegRepeatRange) * n); - CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY); + p = (OnigRepeatRange* )xrealloc(reg->repeat_range, + sizeof(OnigRepeatRange) * n); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); reg->repeat_range = p; reg->repeat_range_alloc = n; } @@ -825,7 +664,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } static int -compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_check, +compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, regex_t* reg) { int r; @@ -842,7 +681,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_check, r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_check); + r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); @@ -858,7 +697,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_check = (infinite && qn->target_may_empty); + int empty_info = qn->target_empty_info; int tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -873,7 +712,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) } } - if (empty_check) + if (empty_info != 0) mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); else mod_tlen = tlen; @@ -932,7 +771,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_check = (infinite && qn->target_may_empty); + int empty_info = qn->target_empty_info; int tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -941,15 +780,22 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; if (IS_NOT_NULL(qn->next_head_exact)) { - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); } - else - return add_opcode(reg, OP_ANYCHAR_STAR); + else { + if (IS_MULTILINE(reg->options)) + return add_opcode(reg, OP_ANYCHAR_ML_STAR); + else + return add_opcode(reg, OP_ANYCHAR_STAR); + } } - if (empty_check) + if (empty_info != 0) mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); else mod_tlen = tlen; @@ -981,7 +827,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) mod_tlen + SIZE_OP_JUMP); if (r) return r; add_bytes(reg, NSTRING(qn->head_exact).s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_check); + r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + SIZE_OP_JUMP + SIZE_OP_PUSH_OR_JUMP_EXACT1)); @@ -991,7 +837,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) mod_tlen + SIZE_OP_JUMP); if (r) return r; add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_check); + r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + SIZE_OP_JUMP + SIZE_OP_PUSH_IF_PEEK_NEXT)); @@ -999,7 +845,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) else { r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_check); + r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + SIZE_OP_JUMP + SIZE_OP_PUSH)); @@ -1008,7 +854,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) else { r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_check); + r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + SIZE_OP_PUSH)); } @@ -1041,7 +887,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) r = compile_tree(qn->target, reg); } else { - r = compile_range_repeat_node(qn, mod_tlen, empty_check, reg); + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); } return r; } @@ -1050,7 +896,7 @@ static int compile_length_option_node(EffectNode* node, regex_t* reg) { int tlen; - RegOptionType prev = reg->options; + OnigOptionType prev = reg->options; reg->options = node->option; tlen = compile_length_tree(node->target, reg); @@ -1058,29 +904,39 @@ compile_length_option_node(EffectNode* node, regex_t* reg) if (tlen < 0) return tlen; - return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL - + tlen + SIZE_OP_SET_OPTION; + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + + tlen + SIZE_OP_SET_OPTION; + } + else + return tlen; } static int compile_option_node(EffectNode* node, regex_t* reg) { int r; - RegOptionType prev = reg->options; - - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - if (r) return r; - r = add_opcode(reg, OP_FAIL); - if (r) return r; + OnigOptionType prev = reg->options; - reg->options = node->option; - r = compile_tree(node->target, reg); - reg->options = prev; - if (r) return r; + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + if (r) return r; + r = add_opcode(reg, OP_FAIL); + if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + } + else { + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + } return r; } @@ -1106,7 +962,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) if (IS_EFFECT_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; - if (IS_FIND_CONDITION(reg->options)) + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) len += (IS_EFFECT_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else @@ -1116,12 +972,12 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) else #endif { - if (BIT_STATUS_AT(reg->backtrack_mem, node->regnum)) + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) len = SIZE_OP_MEMORY_START_PUSH; else len = SIZE_OP_MEMORY_START; - len += tlen + (IS_FIND_CONDITION(reg->options) + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); } break; @@ -1141,7 +997,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) break; default: - return REGERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; break; } @@ -1170,7 +1026,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) if (r) return r; len = compile_length_tree(node->target, reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); - if (IS_FIND_CONDITION(reg->options)) + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) len += (IS_EFFECT_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else @@ -1181,7 +1037,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) if (r) return r; } #endif - if (BIT_STATUS_AT(reg->backtrack_mem, node->regnum)) + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) r = add_opcode(reg, OP_MEMORY_START_PUSH); else r = add_opcode(reg, OP_MEMORY_START); @@ -1192,7 +1048,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) if (r) return r; #ifdef USE_SUBEXP_CALL if (IS_EFFECT_CALLED(node)) { - if (IS_FIND_CONDITION(reg->options)) + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) r = add_opcode(reg, (IS_EFFECT_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else @@ -1207,7 +1063,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) else #endif { - if (IS_FIND_CONDITION(reg->options)) + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) r = add_opcode(reg, OP_MEMORY_END_PUSH); else r = add_opcode(reg, OP_MEMORY_END); @@ -1244,7 +1100,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) break; default: - return REGERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; break; } @@ -1329,7 +1185,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) if (r) return r; if (node->char_len < 0) { r = get_char_length_tree(node->target, reg, &n); - if (r) return REGERR_INVALID_LOOK_BEHIND_PATTERN; + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else n = node->char_len; @@ -1348,7 +1204,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) if (r) return r; if (node->char_len < 0) { r = get_char_length_tree(node->target, reg, &n); - if (r) return REGERR_INVALID_LOOK_BEHIND_PATTERN; + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else n = node->char_len; @@ -1361,7 +1217,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) break; default: - return REGERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; break; } @@ -1419,7 +1275,7 @@ compile_length_tree(Node* node, regex_t* reg) BackrefNode* br = &(NBACKREF(node)); if (br->back_num == 1) { - r = (br->back_static[0] <= 3 + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); } else { @@ -1447,7 +1303,7 @@ compile_length_tree(Node* node, regex_t* reg) break; default: - return REGERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; break; } @@ -1514,12 +1370,8 @@ compile_tree(Node* node, regex_t* reg) switch (NCTYPE(node).type) { case CTYPE_WORD: op = OP_WORD; break; case CTYPE_NOT_WORD: op = OP_NOT_WORD; break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: op = OP_WORD_SB; break; - case CTYPE_WORD_MB: op = OP_WORD_MB; break; -#endif default: - return REGERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; break; } r = add_opcode(reg, op); @@ -1527,7 +1379,10 @@ compile_tree(Node* node, regex_t* reg) break; case N_ANYCHAR: - r = add_opcode(reg, OP_ANYCHAR); + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); break; case N_BACKREF: @@ -1537,20 +1392,28 @@ compile_tree(Node* node, regex_t* reg) if (br->back_num == 1) { n = br->back_static[0]; - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - case 3: r = add_opcode(reg, OP_BACKREF3); break; - default: - r = add_opcode(reg, OP_BACKREFN); + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); if (r) return r; r = add_mem_num(reg, n); - break; + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + case 3: r = add_opcode(reg, OP_BACKREF3); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } } } else { int* p; - add_opcode(reg, OP_BACKREF_MULTI); + add_opcode(reg, (IS_IGNORECASE(reg->options) ? + OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI)); if (r) return r; add_length(reg, br->back_num); if (r) return r; @@ -1582,7 +1445,7 @@ compile_tree(Node* node, regex_t* reg) break; default: -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); #endif break; @@ -1591,6 +1454,194 @@ compile_tree(Node* node, regex_t* reg) return r; } +#ifdef USE_NAMED_GROUP +typedef struct { + int new_val; +} NumMap; + +static int +noname_disable_map(Node** plink, NumMap* map, int* counter) +{ + int r = 0; + Node* node = *plink; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = noname_disable_map(&(NCONS(node).left), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUALIFIER: + { + Node** ptarget = &(NQUALIFIER(node).target); + Node* old = *ptarget; + r = noname_disable_map(ptarget, map, counter); + if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) { + onig_reduce_nested_qualifier(node, *ptarget); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + if (en->type == EFFECT_MEMORY) { + if (IS_EFFECT_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + r = noname_disable_map(&(en->target), map, counter); + } + else { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + } + } + else + r = noname_disable_map(&(en->target), map, counter); + } + break; + + default: + break; + } + + return r; +} + +static int +renumber_node_backref(Node* node, NumMap* map) +{ + int i, pos, n, old_num; + int *backs; + BackrefNode* bn = &(NBACKREF(node)); + + if (! IS_BACKREF_NAME_REF(bn)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + + old_num = bn->back_num; + if (IS_NULL(bn->back_dynamic)) + backs = bn->back_static; + else + backs = bn->back_dynamic; + + for (i = 0, pos = 0; i < old_num; i++) { + n = map[backs[i]].new_val; + if (n > 0) { + backs[pos] = n; + pos++; + } + } + + bn->back_num = pos; + return 0; +} + +static int +renumber_by_map(Node* node, NumMap* map) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = renumber_by_map(NCONS(node).left, map); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + case N_QUALIFIER: + r = renumber_by_map(NQUALIFIER(node).target, map); + break; + case N_EFFECT: + r = renumber_by_map(NEFFECT(node).target, map); + break; + + case N_BACKREF: + r = renumber_node_backref(node, map); + break; + + default: + break; + } + + return r; +} + +static int +numbered_ref_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = numbered_ref_check(NCONS(node).left); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + case N_QUALIFIER: + r = numbered_ref_check(NQUALIFIER(node).target); + break; + case N_EFFECT: + r = numbered_ref_check(NEFFECT(node).target); + break; + + case N_BACKREF: + if (! IS_BACKREF_NAME_REF(&(NBACKREF(node)))) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + break; + + default: + break; + } + + return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ + int r, i, pos, counter; + BitStatusType loc; + NumMap* map; + + map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1)); + CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); + for (i = 1; i <= env->num_mem; i++) { + map[i].new_val = 0; + } + counter = 0; + r = noname_disable_map(root, map, &counter); + if (r != 0) return r; + + r = renumber_by_map(*root, map); + if (r != 0) return r; + + for (i = 1, pos = 1; i <= env->num_mem; i++) { + if (map[i].new_val > 0) { + SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + pos++; + } + } + + loc = env->capture_history; + BIT_STATUS_CLEAR(env->capture_history); + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(loc, i)) { + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + } + } + + env->num_mem = env->num_named; + reg->num_mem = env->num_named; + return 0; +} +#endif /* USE_NAMED_GROUP */ + #ifdef USE_SUBEXP_CALL static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) @@ -1598,17 +1649,17 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) int i, offset; EffectNode* en; AbsAddrType addr; -#ifndef UNALIGNED_WORD_ACCESS +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS UChar buf[SERIALIZE_BUFSIZE]; #endif for (i = 0; i < uslist->num; i++) { en = &(NEFFECT(uslist->us[i].target)); - if (! IS_EFFECT_ADDR_FIXED(en)) return REGERR_PARSER_BUG; + if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; addr = en->call_addr; offset = uslist->us[i].offset; -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); #else SERIALIZE_ABSADDR(addr, buf); @@ -1619,10 +1670,79 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) } #endif +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK static int -get_min_match_length(Node* node, RegDistance *min, ScanEnv* env) +qualifiers_memory_node_info(Node* node) { - RegDistance tmin; + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + { + int v; + do { + v = qualifiers_memory_node_info(NCONS(node).left); + if (v > r) r = v; + } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&NCALL(node))) { + return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ + } + else + r = qualifiers_memory_node_info(NCALL(node).target); + break; +#endif + + case N_QUALIFIER: + { + QualifierNode* qn = &(NQUALIFIER(node)); + if (qn->upper != 0) { + r = qualifiers_memory_node_info(qn->target); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: + return NQ_TARGET_IS_EMPTY_MEM; + break; + + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = qualifiers_memory_node_info(en->target); + break; + default: + break; + } + } + break; + + case N_BACKREF: + case N_STRING: + case N_CTYPE: + case N_CCLASS: + case N_ANYCHAR: + case N_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ + OnigDistance tmin; int r = 0; *min = 0; @@ -1636,11 +1756,11 @@ get_min_match_length(Node* node, RegDistance *min, ScanEnv* env) if (br->state & NST_RECURSION) break; backs = BACKREFS_P(br); - if (backs[0] > env->num_mem) return REGERR_INVALID_BACKREF; + if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; r = get_min_match_length(nodes[backs[0]], min, env); if (r != 0) break; for (i = 1; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return REGERR_INVALID_BACKREF; + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; r = get_min_match_length(nodes[backs[i]], &tmin, env); if (r != 0) break; if (*min > tmin) *min = tmin; @@ -1692,10 +1812,6 @@ get_min_match_length(Node* node, RegDistance *min, ScanEnv* env) switch (NCTYPE(node).type) { case CTYPE_WORD: *min = 1; break; case CTYPE_NOT_WORD: *min = 1; break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: *min = 1; break; - case CTYPE_WORD_MB: *min = 2; break; -#endif default: break; } @@ -1752,9 +1868,9 @@ get_min_match_length(Node* node, RegDistance *min, ScanEnv* env) } static int -get_max_match_length(Node* node, RegDistance *max, ScanEnv* env) +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) { - RegDistance tmax; + OnigDistance tmax; int r = 0; *max = 0; @@ -1785,18 +1901,9 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env) switch (NCTYPE(node).type) { case CTYPE_WORD: case CTYPE_NOT_WORD: -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_MB: -#endif - *max = mbmaxlen_dist(env->enc); + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: - *max = 1; - break; -#endif - default: break; } @@ -1804,7 +1911,7 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env) case N_CCLASS: case N_ANYCHAR: - *max = mbmaxlen_dist(env->enc); + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; case N_BACKREF: @@ -1814,12 +1921,12 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env) Node** nodes = SCANENV_MEM_NODES(env); BackrefNode* br = &(NBACKREF(node)); if (br->state & NST_RECURSION) { - *max = INFINITE_DISTANCE; + *max = ONIG_INFINITE_DISTANCE; break; } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return REGERR_INVALID_BACKREF; + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; r = get_max_match_length(nodes[backs[i]], &tmax, env); if (r != 0) break; if (*max < tmax) *max = tmax; @@ -1832,7 +1939,7 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env) if (! IS_CALL_RECURSION(&(NCALL(node)))) r = get_max_match_length(NCALL(node).target, max, env); else - *max = INFINITE_DISTANCE; + *max = ONIG_INFINITE_DISTANCE; break; #endif @@ -1846,7 +1953,7 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env) if (! IS_REPEAT_INFINITE(qn->upper)) *max = distance_multiply(*max, qn->upper); else - *max = INFINITE_DISTANCE; + *max = ONIG_INFINITE_DISTANCE; } } } @@ -1937,7 +2044,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) StrNode* sn = &(NSTRING(node)); UChar *s = sn->s; while (s < sn->end) { - s += mblen(reg->enc, *s); + s += enc_len(reg->enc, *s); (*len)++; } } @@ -1969,10 +2076,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) switch (NCTYPE(node).type) { case CTYPE_WORD: case CTYPE_NOT_WORD: -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: - case CTYPE_WORD_MB: -#endif *len = 1; break; } @@ -2027,12 +2130,35 @@ get_char_length_tree(Node* node, regex_t* reg, int* len) return get_char_length_tree1(node, reg, len, 0); } +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (code >= SINGLE_BYTE_SIZE) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (cc->not == 0) + return found; + else + return !found; +} + /* x is not included y ==> 1 : 0 */ static int is_not_included(Node* x, Node* y, regex_t* reg) { int i, len; - WCINT wc; + OnigCodePoint code; UChar *p, c; int ytype; @@ -2056,11 +2182,6 @@ is_not_included(Node* x, Node* y, regex_t* reg) else return 0; break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: - case CTYPE_WORD_MB: - break; -#endif default: break; } @@ -2095,7 +2216,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) if (IS_NULL(xc->mbuf) && xc->not == 0) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (BITSET_AT(xc->bs, i)) { - if (IS_SB_WORD(reg->enc, i)) return 0; + if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0; } } return 1; @@ -2104,7 +2225,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) break; case CTYPE_NOT_WORD: for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! IS_SB_WORD(reg->enc, i)) { + if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) { if (xc->not == 0) { if (BITSET_AT(xc->bs, i)) return 0; @@ -2118,11 +2239,6 @@ is_not_included(Node* x, Node* y, regex_t* reg) return 1; break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: - case CTYPE_WORD_MB: - break; -#endif default: break; } @@ -2169,19 +2285,11 @@ is_not_included(Node* x, Node* y, regex_t* reg) case N_CTYPE: switch (NCTYPE(y).type) { case CTYPE_WORD: - return (IS_WORD_STR(reg->enc, xs->s, xs->end) ? 0 : 1); + return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1); break; case CTYPE_NOT_WORD: - return (IS_WORD_STR(reg->enc, xs->s, xs->end) ? 1 : 0); - break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: - return (ismb(reg->enc, c) ? 1 : 0); + return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0); break; - case CTYPE_WORD_MB: - return (ismb(reg->enc, c) ? 0 : 1); - break; -#endif default: break; } @@ -2190,25 +2298,10 @@ is_not_included(Node* x, Node* y, regex_t* reg) case N_CCLASS: { CClassNode* cc = &(NCCLASS(y)); - if (ismb(reg->enc, c)) { - if (IS_NULL(cc->mbuf)) - return (cc->not == 0 ? 1 : 0); - else { - len = mblen(reg->enc, c); - wc = MB2WC(xs->s, xs->s + len, reg->enc); - p = cc->mbuf->p + SIZE_BITSET; - if (regex_is_in_wc_range(p, wc)) - return (cc->not == 0 ? 0 : 1); - else - return (cc->not == 0 ? 1 : 0); - } - } - else { - if (BITSET_AT(cc->bs, c) == 0) - return (cc->not == 0 ? 1 : 0); - else - return (cc->not == 0 ? 0 : 1); - } + + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + enc_len(reg->enc, c)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); } break; @@ -2219,9 +2312,16 @@ is_not_included(Node* x, Node* y, regex_t* reg) len = NSTRING_LEN(x); if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); if (NSTRING_IS_CASE_AMBIG(x) || NSTRING_IS_CASE_AMBIG(y)) { - for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { - if (TOLOWER(reg->enc, *p) != TOLOWER(reg->enc, *q)) - return 1; + UChar plow[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar qlow[ONIGENC_MBC_TO_LOWER_MAXLEN]; + int plen, qlen; + for (p = ys->s, q = xs->s; q < xs->end; ) { + plen = ONIGENC_MBC_TO_LOWER(reg->enc, p, plow); + qlen = ONIGENC_MBC_TO_LOWER(reg->enc, q, qlow); + if (plen != qlen || onig_strncmp(plow, qlow, plen) != 0) + return 1; + p += enc_len(reg->enc, *p); + q += enc_len(reg->enc, *q); } } else { @@ -2279,7 +2379,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) if (exact != 0 && !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { - if (! IS_AMBIGUITY_CHAR(reg->enc, *(sn->s))) + if (! ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, sn->s)) n = node; } else { @@ -2306,7 +2406,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) switch (en->type) { case EFFECT_OPTION: { - RegOptionType options = reg->options; + OnigOptionType options = reg->options; reg->options = NEFFECT(node).option; n = get_head_value_node(NEFFECT(node).target, exact, reg); @@ -2398,7 +2498,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) case N_LIST: { Node *x; - RegDistance min; + OnigDistance min; int ret; x = node; @@ -2508,7 +2608,7 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) if (IS_EFFECT_RECURSION(en)) { SET_EFFECT_STATUS(node, NST_MARK1); r = subexp_inf_recursive_check(en->target, env, 1); - if (r > 0) return REGERR_NEVER_ENDING_RECURSION; + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; CLEAR_EFFECT_STATUS(node, NST_MARK1); } r = subexp_inf_recursive_check_trav(en->target, env); @@ -2684,36 +2784,51 @@ setup_subexp_call(Node* node, ScanEnv* env) CallNode* cn = &(NCALL(node)); Node** nodes = SCANENV_MEM_NODES(env); -#ifdef USE_NAMED_SUBEXP - n = regex_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); +#ifdef USE_NAMED_GROUP + n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); #else - n = REGERR_UNDEFINED_GROUP_REFERENCE; + n = -1; #endif if (n <= 0) { /* name not found, check group number. (?*ddd) */ p = cn->name; - num = regex_scan_unsigned_number(&p, cn->name_end, env->enc); + num = onig_scan_unsigned_number(&p, cn->name_end, env->enc); if (num <= 0 || p != cn->name_end) { - regex_scan_env_set_error_string(env, - REGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return REGERR_UNDEFINED_NAME_REFERENCE; + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (num > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; } - if (num > env->num_mem) return REGERR_UNDEFINED_GROUP_REFERENCE; cn->ref_num = num; goto set_call_attr; } else if (n > 1) { - regex_scan_env_set_error_string(env, - REGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return REGERR_MULTIPLEX_DEFINITION_NAME_CALL; + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; } else { cn->ref_num = refs[0]; set_call_attr: cn->target = nodes[cn->ref_num]; - if (IS_NULL(cn->target)) return REGERR_INVALID_SUBEXP_NAME; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } SET_EFFECT_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->backtrack_mem, cn->ref_num); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); cn->unset_addr_list = env->unset_addr_list; } } @@ -2762,8 +2877,8 @@ divide_look_behind_alternatives(Node* node) np = node; while ((np = NCONS(np).right) != NULL_NODE) { - insert_node = regex_node_new_anchor(anc_type); - CHECK_NULL_RETURN_VAL(insert_node, REGERR_MEMORY); + insert_node = onig_node_new_anchor(anc_type); + CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY); NANCHOR(insert_node).target = NCONS(np).left; NCONS(np).left = insert_node; } @@ -2787,12 +2902,12 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) if (r == 0) an->char_len = len; else if (r == GET_CHAR_LEN_VARLEN) - r = REGERR_INVALID_LOOK_BEHIND_PATTERN; + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { - if (IS_SYNTAX_BV(env->syntax, REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) r = divide_look_behind_alternatives(node); else - r = REGERR_INVALID_LOOK_BEHIND_PATTERN; + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } return r; @@ -2820,8 +2935,8 @@ next_setup(Node* node, Node* next_node, regex_t* reg) if (IS_NOT_NULL(x)) { y = get_head_value_node(next_node, 0, reg); if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = regex_node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, REGERR_MEMORY); + Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK); + CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); SET_EFFECT_STATUS(en, NST_SIMPLE_REPEAT); swap_node(node, en); NEFFECT(node).target = en; @@ -2846,7 +2961,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg) #define IN_REPEAT (1<<2) /* setup_tree does the following work. - 1. check empty loop. (set qn->target_may_empty) + 1. check empty loop. (set qn->target_empty_info) 2. expand ignore-case in char class. 3. set memory status bit flags. (reg->mem_stats) 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. @@ -2882,13 +2997,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case N_CCLASS: if (IS_IGNORECASE(reg->options)) { - int c, t; + int i; + UChar c, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; BitSetRef bs = NCCLASS(node).bs; - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - t = TOLOWER(reg->enc, c); - if (t != c) { - if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, t); - if (BITSET_AT(bs, t)) BITSET_SET_BIT(bs, c); + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + c = (UChar )i; + ONIGENC_MBC_TO_LOWER(reg->enc, &c, lowbuf); + if (*lowbuf != c) { + if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, *lowbuf); + if (BITSET_AT(bs, *lowbuf)) BITSET_SET_BIT(bs, c); } } } @@ -2900,7 +3017,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) UChar* p = sn->s; while (p < sn->end) { - if (IS_AMBIGUITY_CHAR(reg->enc, *p)) { + if (ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p)) { NSTRING_SET_CASE_AMBIG(node); break; } @@ -2926,9 +3043,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) BackrefNode* br = &(NBACKREF(node)); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - if (p[i] > env->num_mem) return REGERR_INVALID_BACKREF; + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); - BIT_STATUS_ON_AT(env->backtrack_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } @@ -2936,7 +3053,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case N_QUALIFIER: { - RegDistance d; + OnigDistance d; QualifierNode* qn = &(NQUALIFIER(node)); Node* target = qn->target; @@ -2944,7 +3061,14 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = get_min_match_length(target, &d, env); if (r) break; if (d == 0) { - qn->target_may_empty = 1; + qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + r = qualifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } +#endif #if 0 r = get_max_match_length(target, &d, env); if (r == 0 && d == 0) { @@ -2974,19 +3098,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { int i, n = qn->lower; - regex_node_conv_to_str_node(node, NSTRING(target).flag); + onig_node_conv_to_str_node(node, NSTRING(target).flag); for (i = 0; i < n; i++) { - r = regex_node_str_cat(node, sn->s, sn->end); + r = onig_node_str_cat(node, sn->s, sn->end); if (r) break; } - regex_node_free(target); + onig_node_free(target); break; /* break case N_QUALIFIER: */ } } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (qn->greedy && !qn->target_may_empty) { + if (qn->greedy && (qn->target_empty_info != 0)) { if (NTYPE(target) == N_QUALIFIER) { QualifierNode* tqn = &(NQUALIFIER(target)); if (IS_NOT_NULL(tqn->head_exact)) { @@ -3009,7 +3133,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) switch (en->type) { case EFFECT_OPTION: { - RegOptionType options = reg->options; + OnigOptionType options = reg->options; reg->options = NEFFECT(node).option; r = setup_tree(NEFFECT(node).target, reg, state, env); reg->options = options; @@ -3018,7 +3142,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case EFFECT_MEMORY: if ((state & (IN_ALT | IN_NOT | IN_REPEAT)) != 0) { - BIT_STATUS_ON_AT(env->backtrack_mem, en->regnum); + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */ } /* fall */ @@ -3073,7 +3197,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB); if (r < 0) return r; - if (r > 0) return REGERR_INVALID_LOOK_BEHIND_PATTERN; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); if (r != 0) return r; r = setup_tree(an->target, reg, state, env); @@ -3085,7 +3209,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); if (r < 0) return r; - if (r > 0) return REGERR_INVALID_LOOK_BEHIND_PATTERN; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); if (r != 0) return r; r = setup_tree(an->target, reg, (state | IN_NOT), env); @@ -3104,18 +3228,21 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moor search */ static int -set_bm_skip(UChar* s, UChar* end, RegCharEncoding enc, int ignore_case, +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case, UChar skip[], int** int_skip) { int i, len; + UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; len = end - s; - if (len < REG_CHAR_TABLE_SIZE) { - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) skip[i] = len; + if (len < ONIG_CHAR_TABLE_SIZE) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; if (ignore_case) { - for (i = 0; i < len - 1; i++) - skip[TOLOWER(enc, s[i])] = len - 1 - i; + for (i = 0; i < len - 1; i++) { + ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf); + skip[*lowbuf] = len - 1 - i; + } } else { for (i = 0; i < len - 1; i++) @@ -3124,14 +3251,16 @@ set_bm_skip(UChar* s, UChar* end, RegCharEncoding enc, int ignore_case, } else { if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * REG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return REGERR_MEMORY; + *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; } - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; if (ignore_case) { - for (i = 0; i < len - 1; i++) - (*int_skip)[TOLOWER(enc, s[i])] = len - 1 - i; + for (i = 0; i < len - 1; i++) { + ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf); + (*int_skip)[*lowbuf] = len - 1 - i; + } } else { for (i = 0; i < len - 1; i++) @@ -3144,16 +3273,15 @@ set_bm_skip(UChar* s, UChar* end, RegCharEncoding enc, int ignore_case, #define OPT_EXACT_MAXLEN 24 typedef struct { - RegDistance min; /* min byte length */ - RegDistance max; /* max byte length */ + OnigDistance min; /* min byte length */ + OnigDistance max; /* max byte length */ } MinMaxLen; typedef struct { MinMaxLen mmd; BitStatusType backrefed_status; - RegCharEncoding enc; - RegOptionType options; - RegTransTableType transtable; + OnigEncoding enc; + OnigOptionType options; ScanEnv* scan_env; } OptEnv; @@ -3177,7 +3305,7 @@ typedef struct { OptAncInfo anc; int value; /* weighted value */ - UChar map[REG_CHAR_TABLE_SIZE]; + UChar map[ONIG_CHAR_TABLE_SIZE]; } OptMapInfo; typedef struct { @@ -3230,7 +3358,7 @@ distance_value(MinMaxLen* mm) int d; - if (mm->max == INFINITE_DISTANCE) return 0; + if (mm->max == ONIG_INFINITE_DISTANCE) return 0; d = mm->max - mm->min; if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) @@ -3265,7 +3393,7 @@ is_equal_mml(MinMaxLen* a, MinMaxLen* b) static void -set_mml(MinMaxLen* mml, RegDistance min, RegDistance max) +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) { mml->min = min; mml->max = max; @@ -3292,7 +3420,7 @@ add_mml(MinMaxLen* to, MinMaxLen* from) } static void -add_len_mml(MinMaxLen* to, RegDistance len) +add_len_mml(MinMaxLen* to, OnigDistance len) { to->min = distance_add(to->min, len); to->max = distance_add(to->max, len); @@ -3326,7 +3454,7 @@ copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) static void concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - RegDistance left_len, RegDistance right_len) + OnigDistance left_len, OnigDistance right_len) { clear_opt_anc_info(to); @@ -3433,7 +3561,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add) static void concat_opt_exact_info_str(OptExactInfo* to, - UChar* s, UChar* end, int raw, RegCharEncoding code) + UChar* s, UChar* end, int raw, OnigEncoding enc) { int i, j, len; UChar *p; @@ -3443,7 +3571,8 @@ concat_opt_exact_info_str(OptExactInfo* to, to->s[i++] = *p++; } else { - len = mblen(code, *p); + len = enc_len(enc, *p); + if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len; j++) to->s[i++] = *p++; } @@ -3469,7 +3598,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) for (i = 0; i < to->len && i < add->len; ) { if (to->s[i] != add->s[i]) break; - len = mblen(env->enc, to->s[i]); + len = enc_len(env->enc, to->s[i]); for (j = 1; j < len; j++) { if (to->s[i+j] != add->s[i+j]) break; @@ -3508,7 +3637,7 @@ clear_opt_map_info(OptMapInfo* map) clear_mml(&map->mmd); clear_opt_anc_info(&map->anc); map->value = 0; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) map->map[i] = 0; } @@ -3528,19 +3657,23 @@ add_char_opt_map_info(OptMapInfo* map, int c) } static void -add_char_amb_opt_map_info(OptMapInfo* map, int c, RegCharEncoding enc) +add_char_amb_opt_map_info(OptMapInfo* map, int c, OnigEncoding enc) { - int i, t; + UChar x, low[ONIGENC_MBC_TO_LOWER_MAXLEN]; add_char_opt_map_info(map, c); - t = TOLOWER(enc, c); - if (t != c) { - add_char_opt_map_info(map, t); + + x = (UChar )c; + ONIGENC_MBC_TO_LOWER(enc, &x, low); + if (*low != x) { + add_char_opt_map_info(map, (int )(*low)); } else { - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) { - t = TOLOWER(enc, i); - if (t == c) add_char_opt_map_info(map, i); + int i; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + x = (UChar )i; + ONIGENC_MBC_TO_LOWER(enc, &x, low); + if ((int )(*low) == c) add_char_opt_map_info(map, i); } } } @@ -3592,7 +3725,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add) alt_merge_mml(&to->mmd, &add->mmd); val = 0; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { if (add->map[i]) to->map[i] = 1; @@ -3645,9 +3778,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add) } if (add->map.value > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->map.anc, - to->len.max, add->len.max); - copy_opt_anc_info(&add->map.anc, &tanc); + if (add->map.mmd.max == 0) + add->map.anc.left_anchor |= to->anc.left_anchor; } exb_reach = to->exb.reach_end; @@ -3764,8 +3896,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } else { for (p = sn->s; p < sn->end; ) { - len = mblen(env->enc, *p); - if (len == 1 && IS_AMBIGUITY_CHAR(env->enc, *p)) { + len = enc_len(env->enc, *p); + if (len == 1 && ONIGENC_IS_MBC_CASE_AMBIG(env->enc, p)) { break; } p += len; @@ -3790,7 +3922,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (slen > 0) { if (p == sn->s) - add_char_amb_opt_map_info(&opt->map, *(sn->s), env->transtable); + add_char_amb_opt_map_info(&opt->map, *(sn->s), env->enc); else add_char_opt_map_info(&opt->map, *(sn->s)); } @@ -3805,11 +3937,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case N_CCLASS: { - int i, z, len, found; + int i, z, len, found, mb_found; CClassNode* cc = &(NCCLASS(node)); /* no need to check ignore case. (setted in setup_tree()) */ - found = 0; + found = mb_found = 0; for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); if ((z && !cc->not) || (!z && cc->not)) { @@ -3818,21 +3950,30 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } } - if (IS_NOT_NULL(cc->mbuf)) { + if (IS_NULL(cc->mbuf)) { + if (cc->not) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + add_char_opt_map_info(&opt->map, i); + } + mb_found = 1; + } + } + else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT((BitSetRef )(cc->mbuf->p), i)) { - found = 1; + z = ONIGENC_IS_MBC_HEAD(env->enc, i); + if (z) { + mb_found = 1; add_char_opt_map_info(&opt->map, i); } } } - if (found) { - if (IS_NULL(cc->mbuf)) - len = bitset_mbmaxlen(cc->bs, cc->not, env->enc); - else - len = mbmaxlen_dist(env->enc); - + if (mb_found) { + len = ONIGENC_MBC_MAXLEN_DIST(env->enc); + set_mml(&opt->len, 1, len); + } + else if (found) { + len = 1; set_mml(&opt->len, 1, len); } } @@ -3843,15 +3984,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) int c; int len, min, max; - min = mbmaxlen_dist(env->enc); + min = ONIGENC_MBC_MAXLEN_DIST(env->enc); max = 0; +#define IS_WORD_HEAD_BYTE(enc,b) \ + (ONIGENC_IS_MBC_ASCII(&b) ? ONIGENC_IS_CODE_WORD(enc,((OnigCodePoint )b)) \ + : ONIGENC_IS_MBC_HEAD(enc,b)) + switch (NCTYPE(node).type) { case CTYPE_WORD: for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_WORD_HEAD(env->enc, c)) { + if (IS_WORD_HEAD_BYTE(env->enc, c)) { add_char_opt_map_info(&opt->map, c); - len = mblen(env->enc, c); + len = enc_len(env->enc, c); if (len < min) min = len; if (len > max) max = len; } @@ -3860,36 +4005,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case CTYPE_NOT_WORD: for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! IS_WORD_HEAD(env->enc, c)) { + if (! IS_WORD_HEAD_BYTE(env->enc, c)) { add_char_opt_map_info(&opt->map, c); - len = mblen(env->enc, c); + len = enc_len(env->enc, c); if (len < min) min = len; if (len > max) max = len; } } break; - -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_SB_WORD(env->enc, c)) { - add_char_opt_map_info(&opt->map, c); - } - } - min = max = 1; - break; - - case CTYPE_WORD_MB: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_MB_WORD(env->enc, c)) { - add_char_opt_map_info(&opt->map, c); - len = mblen(env->enc, c); - if (len < min) min = len; - if (len > max) max = len; - } - } - break; -#endif } set_mml(&opt->len, min, max); @@ -3898,7 +4021,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case N_ANYCHAR: { - RegDistance len = mbmaxlen_dist(env->enc); + OnigDistance len = ONIGENC_MBC_MAXLEN_DIST(env->enc); set_mml(&opt->len, 1, len); } break; @@ -3944,12 +4067,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) { int i; int* backs; - RegDistance min, max, tmin, tmax; + OnigDistance min, max, tmin, tmax; Node** nodes = SCANENV_MEM_NODES(env->scan_env); BackrefNode* br = &(NBACKREF(node)); if (br->state & NST_RECURSION) { - set_mml(&opt->len, 0, INFINITE_DISTANCE); + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); break; } backs = BACKREFS_P(br); @@ -3972,9 +4095,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) #ifdef USE_SUBEXP_CALL case N_CALL: if (IS_CALL_RECURSION(&(NCALL(node)))) - set_mml(&opt->len, 0, INFINITE_DISTANCE); + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); else { + OnigOptionType save = env->options; + env->options = NEFFECT(NCALL(node).target).option; r = optimize_node_left(NCALL(node).target, opt, env); + env->options = save; } break; #endif @@ -3982,7 +4108,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case N_QUALIFIER: { int i; - RegDistance min, max; + OnigDistance min, max; NodeOptInfo nopt; QualifierNode* qn = &(NQUALIFIER(node)); @@ -4024,7 +4150,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = distance_multiply(nopt.len.min, qn->lower); if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? INFINITE_DISTANCE : 0); + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); else max = distance_multiply(nopt.len.max, qn->upper); @@ -4039,7 +4165,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) switch (en->type) { case EFFECT_OPTION: { - RegOptionType save = env->options; + OnigOptionType save = env->options; env->options = en->option; r = optimize_node_left(en->target, opt, env); @@ -4051,10 +4177,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) #ifdef USE_SUBEXP_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { - RegDistance min, max; + OnigDistance min, max; min = 0; - max = INFINITE_DISTANCE; + max = ONIG_INFINITE_DISTANCE; if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len; if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len; set_mml(&opt->len, min, max); @@ -4079,11 +4205,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; default: -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG fprintf(stderr, "optimize_node_left: undefined node type %d\n", NTYPE(node)); #endif - r = REGERR_TYPE_BUG; + r = ONIGERR_TYPE_BUG; break; } @@ -4097,22 +4223,32 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->len == 0) return 0; - reg->exact = regex_strdup(e->s, e->s + e->len); - if (IS_NULL(reg->exact)) return REGERR_MEMORY; + reg->exact = onig_strdup(e->s, e->s + e->len); + CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); reg->exact_end = reg->exact + e->len; if (e->ignore_case) { - UChar *p; - int len; - for (p = reg->exact; p < reg->exact_end; ) { - len = mblen(reg->enc, *p); - if (len == 1) { - *p = TOLOWER(reg->enc, *p); + UChar buf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + int len, low_len, i, j, alloc_size; + + alloc_size = e->len; + i = j = 0; + while (i < e->len) { + low_len = ONIGENC_MBC_TO_LOWER(reg->enc, &(e->s[i]), buf); + len = enc_len(reg->enc, e->s[i]); + if (low_len > alloc_size - i) { + reg->exact = xrealloc(reg->exact, alloc_size * 2); + CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + alloc_size *= 2; } - p += len; + + xmemcpy(&(reg->exact[j]), buf, low_len); + i += len; + j += low_len; } - reg->optimize = REG_OPTIMIZE_EXACT_IC; + reg->exact_end = reg->exact + j; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; } else { int allow_reverse; @@ -4121,7 +4257,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) allow_reverse = 1; else allow_reverse = - regex_is_allow_reverse_match(reg->enc, reg->exact, reg->exact_end); + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 0, @@ -4129,17 +4265,17 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (r) return r; reg->optimize = (allow_reverse != 0 - ? REG_OPTIMIZE_EXACT_BM : REG_OPTIMIZE_EXACT_BM_NOT_REV); + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } else { - reg->optimize = REG_OPTIMIZE_EXACT; + reg->optimize = ONIG_OPTIMIZE_EXACT; } } reg->dmin = e->mmd.min; reg->dmax = e->mmd.max; - if (reg->dmin != INFINITE_DISTANCE) { + if (reg->dmin != ONIG_INFINITE_DISTANCE) { reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); } @@ -4151,14 +4287,14 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m) { int i; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) reg->map[i] = m->map[i]; - reg->optimize = REG_OPTIMIZE_MAP; + reg->optimize = ONIG_OPTIMIZE_MAP; reg->dmin = m->mmd.min; reg->dmax = m->mmd.max; - if (reg->dmin != INFINITE_DISTANCE) { + if (reg->dmin != ONIG_INFINITE_DISTANCE) { reg->threshold_len = reg->dmin + 1; } } @@ -4170,7 +4306,7 @@ set_sub_anchor(regex_t* reg, OptAncInfo* anc) reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; } -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG static void print_optimize_info(FILE* f, regex_t* reg); #endif @@ -4222,7 +4358,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; } -#if defined(REG_DEBUG_COMPILE) || defined(REG_DEBUG_MATCH) +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) print_optimize_info(stderr, reg); #endif return r; @@ -4231,7 +4367,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) static void clear_optimize_info(regex_t* reg) { - reg->optimize = REG_OPTIMIZE_NONE; + reg->optimize = ONIG_OPTIMIZE_NONE; reg->anchor = 0; reg->anchor_dmin = 0; reg->anchor_dmax = 0; @@ -4244,19 +4380,19 @@ clear_optimize_info(regex_t* reg) } } -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG static void -print_distance_range(FILE* f, RegDistance a, RegDistance b) +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) { - if (a == INFINITE_DISTANCE) + if (a == ONIG_INFINITE_DISTANCE) fputs("inf", f); else fprintf(f, "(%u)", a); fputs("-", f); - if (b == INFINITE_DISTANCE) + if (b == ONIG_INFINITE_DISTANCE) fputs("inf", f); else fprintf(f, "(%u)", b); @@ -4337,58 +4473,58 @@ print_optimize_info(FILE* f, regex_t* reg) } fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); } - else if (reg->optimize & REG_OPTIMIZE_MAP) { + else if (reg->optimize & ONIG_OPTIMIZE_MAP) { int i, n = 0; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) if (reg->map[i]) n++; fprintf(f, "map: n=%d\n", n); if (n > 0) { fputc('[', f); - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) - if (reg->map[i] && mblen(reg->enc, i) == 1 && - IS_CODE_PRINT(reg->enc, i)) + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + if (reg->map[i] && enc_len(reg->enc, i) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, i)) fputc(i, f); fprintf(f, "]\n"); } } } -#endif /* REG_DEBUG */ +#endif /* ONIG_DEBUG */ static void -regex_free_body(regex_t* reg) +onig_free_body(regex_t* reg) { if (IS_NOT_NULL(reg->p)) xfree(reg->p); if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) regex_free(reg->chain); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); -#ifdef USE_NAMED_SUBEXP - regex_names_free(reg); +#ifdef USE_NAMED_GROUP + onig_names_free(reg); #endif } extern void -regex_free(regex_t* reg) +onig_free(regex_t* reg) { if (IS_NOT_NULL(reg)) { - regex_free_body(reg); + onig_free_body(reg); xfree(reg); } } #define REGEX_TRANSFER(to,from) do {\ - (to)->state = REG_STATE_MODIFY;\ - regex_free_body(to);\ + (to)->state = ONIG_STATE_MODIFY;\ + onig_free_body(to);\ xmemcpy(to, from, sizeof(regex_t));\ xfree(from);\ } while (0) static void -regex_transfer(regex_t* to, regex_t* from) +onig_transfer(regex_t* to, regex_t* from) { THREAD_ATOMIC_START; REGEX_TRANSFER(to, from); @@ -4402,7 +4538,7 @@ regex_transfer(regex_t* to, regex_t* from) } while (0) static void -regex_chain_link_add(regex_t* to, regex_t* add) +onig_chain_link_add(regex_t* to, regex_t* add) { THREAD_ATOMIC_START; REGEX_CHAIN_HEAD(to); @@ -4411,7 +4547,7 @@ regex_chain_link_add(regex_t* to, regex_t* add) } extern void -regex_chain_reduce(regex_t* reg) +onig_chain_reduce(regex_t* reg) { regex_t *head, *prev; @@ -4419,7 +4555,7 @@ regex_chain_reduce(regex_t* reg) prev = reg; head = prev->chain; if (IS_NOT_NULL(head)) { - reg->state = REG_STATE_MODIFY; + reg->state = ONIG_STATE_MODIFY; while (IS_NOT_NULL(head->chain)) { prev = head; head = head->chain; @@ -4432,37 +4568,36 @@ regex_chain_reduce(regex_t* reg) #if 0 extern int -regex_clone(regex_t** to, regex_t* from) +onig_clone(regex_t** to, regex_t* from) { int r, size; regex_t* reg; - if (REG_STATE(from) == REG_STATE_NORMAL) { + if (ONIG_STATE(from) == ONIG_STATE_NORMAL) { from->state++; /* increment as search counter */ if (IS_NOT_NULL(from->chain)) { - regex_chain_reduce(from); + onig_chain_reduce(from); from->state++; } } else { int n = 0; - while (REG_STATE(from) < REG_STATE_NORMAL) { + while (ONIG_STATE(from) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) - return REGERR_OVER_THREAD_PASS_LIMIT_COUNT; + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } from->state++; /* increment as search counter */ } - r = regex_alloc_init(®, REG_OPTION_NONE, RegDefaultCharEncoding, - REG_TRANSTABLE_USE_DEFAULT); + r = onig_alloc_init(®, ONIG_OPTION_NONE, from->enc, ONIG_SYNTAX_DEFAULT); if (r != 0) { from->state--; return r; } - xmemcpy(reg, from, sizeof(regex_t)); - reg->state = REG_STATE_NORMAL; + xmemcpy(reg, from, sizeof(onig_t)); + reg->state = ONIG_STATE_NORMAL; reg->chain = (regex_t* )NULL; if (from->p) { @@ -4479,20 +4614,20 @@ regex_clone(regex_t** to, regex_t* from) } if (from->int_map) { - size = sizeof(int) * REG_CHAR_TABLE_SIZE; + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; reg->int_map = (int* )xmalloc(size); if (IS_NULL(reg->int_map)) goto mem_error; xmemcpy(reg->int_map, from->int_map, size); } if (from->int_map_backward) { - size = sizeof(int) * REG_CHAR_TABLE_SIZE; + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; reg->int_map_backward = (int* )xmalloc(size); if (IS_NULL(reg->int_map_backward)) goto mem_error; xmemcpy(reg->int_map_backward, from->int_map_backward, size); } -#ifdef USE_NAMED_SUBEXP +#ifdef USE_NAMED_GROUP reg->name_table = names_clone(from); /* names_clone is not implemented */ #endif @@ -4502,18 +4637,20 @@ regex_clone(regex_t** to, regex_t* from) mem_error: from->state--; - return REGERR_MEMORY; + return ONIGERR_MEMORY; } #endif -#ifdef REG_DEBUG -static void print_tree P_((FILE* f, Node* node)); +#ifdef ONIG_DEBUG static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); #endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif extern int -regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, - RegErrorInfo* einfo) +onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, + OnigErrorInfo* einfo) { #define COMPILE_INIT_SIZE 20 @@ -4524,13 +4661,13 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, UnsetAddrList uslist; #endif - reg->state = REG_STATE_COMPILING; + reg->state = ONIG_STATE_COMPILING; if (reg->alloc == 0) { init_size = (pattern_end - pattern) * 2; if (init_size <= 0) init_size = COMPILE_INIT_SIZE; r = BBUF_INIT(reg, init_size); - if (r) goto end; + if (r != 0) goto end; } else reg->used = 0; @@ -4539,26 +4676,40 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, reg->num_repeat = 0; reg->num_null_check = 0; reg->repeat_range_alloc = 0; - reg->repeat_range = (RegRepeatRange* )NULL; + reg->repeat_range = (OnigRepeatRange* )NULL; + + r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP + /* mixed use named group and no-named group */ + if (scan_env.num_named > 0 && + IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + if (scan_env.num_named != scan_env.num_mem) + r = disable_noname_group_capture(&root, reg, &scan_env); + else + r = numbered_ref_check(root); - r = regex_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); - if (r) goto err; + if (r != 0) goto err; + } +#endif -#ifdef REG_DEBUG_PARSE_TREE +#ifdef ONIG_DEBUG_PARSE_TREE print_tree(stderr, root); #endif #ifdef USE_SUBEXP_CALL if (scan_env.num_call > 0) { r = unset_addr_list_init(&uslist, scan_env.num_call); - if (r) goto err; + if (r != 0) goto err; scan_env.unset_addr_list = &uslist; r = setup_subexp_call(root, &scan_env); - if (r) goto err_unset; + if (r != 0) goto err_unset; r = subexp_recursive_check_trav(root, &scan_env); - if (r < 0) goto err_unset; + if (r < 0) goto err_unset; r = subexp_inf_recursive_check_trav(root, &scan_env); - if (r) goto err_unset; + if (r != 0) goto err_unset; reg->num_call = scan_env.num_call; } @@ -4567,14 +4718,22 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, #endif r = setup_tree(root, reg, 0, &scan_env); - if (r) goto err_unset; + if (r != 0) goto err_unset; - reg->backtrack_mem = scan_env.backtrack_mem; + reg->capture_history = scan_env.capture_history; + reg->bt_mem_start = scan_env.bt_mem_start; + reg->bt_mem_start |= reg->capture_history; + if (IS_FIND_CONDITION(reg->options)) + BIT_STATUS_ON_ALL(reg->bt_mem_end); + else { + reg->bt_mem_end = scan_env.bt_mem_end; + reg->bt_mem_end |= reg->capture_history; + } clear_optimize_info(reg); -#ifndef REG_DONT_OPTIMIZE +#ifndef ONIG_DONT_OPTIMIZE r = set_optimize_info_from_tree(root, reg, &scan_env); - if (r) goto err_unset; + if (r != 0) goto err_unset; #endif if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { @@ -4593,10 +4752,10 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, } #endif - if ((reg->num_repeat != 0) || IS_FIND_CONDITION(reg->options)) + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) reg->stack_pop_level = STACK_POP_LEVEL_ALL; else { - if (reg->backtrack_mem != 0) + if (reg->bt_mem_start != 0) reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; else reg->stack_pop_level = STACK_POP_LEVEL_FREE; @@ -4607,17 +4766,17 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, unset_addr_list_end(&uslist); } #endif - regex_node_free(root); + onig_node_free(root); -#ifdef REG_DEBUG_COMPILE -#ifdef USE_NAMED_SUBEXP - regex_print_names(stderr, reg); +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP + onig_print_names(stderr, reg); #endif print_compiled_byte_code_list(stderr, reg); #endif end: - reg->state = REG_STATE_NORMAL; + reg->state = ONIG_STATE_NORMAL; return r; err_unset: @@ -4634,51 +4793,54 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, } } - if (IS_NOT_NULL(root)) regex_node_free(root); + if (IS_NOT_NULL(root)) onig_node_free(root); if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) xfree(scan_env.mem_nodes_dynamic); return r; } extern int -regex_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end, - RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, - RegErrorInfo* einfo) +onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) { int r; regex_t *new_reg; - r = regex_new(&new_reg, pattern, pattern_end, option, code, syntax, einfo); + r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); if (r) return r; - if (REG_STATE(reg) == REG_STATE_NORMAL) { - regex_transfer(reg, new_reg); + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); } else { - regex_chain_link_add(reg, new_reg); + onig_chain_link_add(reg, new_reg); } return 0; } -static int regex_inited = 0; +static int onig_inited = 0; extern int -regex_alloc_init(regex_t** reg, RegOptionType option, RegCharEncoding enc, - RegSyntaxType* syntax) +onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax) { - if (! regex_inited) - regex_init(); + if (! onig_inited) + onig_init(); + + if (ONIGENC_IS_UNDEF(enc)) + return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return REGERR_MEMORY; + if (IS_NULL(*reg)) return ONIGERR_MEMORY; - if ((option & REG_OPTION_NEGATE_SINGLELINE) != 0) { + if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { option |= syntax->options; - option &= ~REG_OPTION_SINGLELINE; + option &= ~ONIG_OPTION_SINGLELINE; } else option |= syntax->options; - (*reg)->state = REG_STATE_NORMAL; + (*reg)->state = ONIG_STATE_NORMAL; (*reg)->enc = enc; (*reg)->options = option; (*reg)->syntax = syntax; @@ -4697,82 +4859,65 @@ regex_alloc_init(regex_t** reg, RegOptionType option, RegCharEncoding enc, } extern int -regex_new(regex_t** reg, UChar* pattern, UChar* pattern_end, - RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, - RegErrorInfo* einfo) +onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) { int r; if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; - r = regex_alloc_init(reg, option, code, syntax); + r = onig_alloc_init(reg, option, enc, syntax); if (r) return r; - r = regex_compile(*reg, pattern, pattern_end, einfo); + r = onig_compile(*reg, pattern, pattern_end, einfo); if (r) { - regex_free(*reg); + onig_free(*reg); *reg = NULL; } return r; } -extern void -regex_set_default_trans_table(UChar* table) +extern int +onig_init() { - int i; + if (onig_inited != 0) + return 0; - if (table && table != DefaultTransTable) { - DefaultTransTable = table; + onig_inited = 1; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) - AmbiguityTable[i] = 0; + THREAD_ATOMIC_START; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) { - AmbiguityTable[table[i]]++; - if (table[i] != i) - AmbiguityTable[i] += 2; - } - } -} + onigenc_init(); + onigenc_set_default_caseconv_table((UChar* )0); -extern int -regex_init() -{ - regex_inited = 1; - - THREAD_ATOMIC_START; -#ifdef DEFAULT_TRANSTABLE_EXIST - if (! DefaultTransTable) /* check re_set_casetable() called already. */ - regex_set_default_trans_table(DTT); +#ifdef ONIG_DEBUG_STATISTICS + onig_statistics_init(); #endif -#ifdef REG_DEBUG_STATISTICS - regex_statistics_init(); -#endif THREAD_ATOMIC_END; - return 0; } extern int -regex_end() +onig_end() { -#ifdef REG_DEBUG_STATISTICS - regex_print_statistics(stderr); +#ifdef ONIG_DEBUG_STATISTICS + onig_print_statistics(stderr); #endif #ifdef USE_RECYCLE_NODE - regex_free_node_list(); + onig_free_node_list(); #endif - regex_inited = 0; + onig_inited = 0; return 0; } -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG -RegOpInfoType RegOpInfo[] = { +OnigOpInfoType OnigOpInfo[] = { { OP_FINISH, "finish", ARG_NON }, { OP_END, "end", ARG_NON }, { OP_EXACT1, "exact1", ARG_SPECIAL }, @@ -4796,8 +4941,11 @@ RegOpInfoType RegOpInfo[] = { { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, { OP_WORD, "word", ARG_NON }, { OP_NOT_WORD, "not-word", ARG_NON }, { OP_WORD_SB, "word-sb", ARG_NON }, @@ -4816,7 +4964,9 @@ RegOpInfoType RegOpInfo[] = { { OP_BACKREF2, "backref2", ARG_NON }, { OP_BACKREF3, "backref3", ARG_NON }, { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_MEMNUM }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL }, { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, @@ -4837,6 +4987,8 @@ RegOpInfoType RegOpInfo[] = { { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM }, { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, { OP_PUSH_POS, "push-pos", ARG_NON }, { OP_POP_POS, "pop-pos", ARG_NON }, { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, @@ -4856,9 +5008,9 @@ op2name(int opcode) { int i; - for (i = 0; RegOpInfo[i].opcode >= 0; i++) { - if (opcode == RegOpInfo[i].opcode) - return RegOpInfo[i].name; + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].name; } return ""; } @@ -4868,9 +5020,9 @@ op2arg_type(int opcode) { int i; - for (i = 0; RegOpInfo[i].opcode >= 0; i++) { - if (opcode == RegOpInfo[i].opcode) - return RegOpInfo[i].arg_type; + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].arg_type; } return ARG_SPECIAL; } @@ -4899,13 +5051,13 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) } extern void -regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) { int i, n, arg_type; RelAddrType addr; LengthType len; MemNumType mem; - WCINT wc; + OnigCodePoint code; UChar *q; fprintf(f, "[%s", op2name(*bp)); @@ -4935,7 +5087,7 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) break; case ARG_OPTION: { - RegOptionType option = *((RegOptionType* )bp); + OnigOptionType option = *((OnigOptionType* )bp); bp += SIZE_OPTION; fprintf(f, ":%d", option); } @@ -4946,6 +5098,7 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) switch (*bp++) { case OP_EXACT1: case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p_string(f, 1, bp++); break; case OP_EXACT2: p_string(f, 2, bp); bp += 2; break; @@ -5014,12 +5167,12 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) case OP_CCLASS_MB_NOT: GET_LENGTH_INC(len, bp); q = bp; -#ifndef UNALIGNED_WORD_ACCESS +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS ALIGNMENT_RIGHT(q); #endif - GET_WCINT(wc, q); + GET_CODE_POINT(code, q); bp += len; - fprintf(f, ":%d:%d", (int )wc, len); + fprintf(f, ":%d:%d", (int )code, len); break; case OP_CCLASS_MIX: @@ -5028,15 +5181,16 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) bp += SIZE_BITSET; GET_LENGTH_INC(len, bp); q = bp; -#ifndef UNALIGNED_WORD_ACCESS +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS ALIGNMENT_RIGHT(q); #endif - GET_WCINT(wc, q); + GET_CODE_POINT(code, q); bp += len; - fprintf(f, ":%d:%d:%d", n, (int )wc, len); + fprintf(f, ":%d:%d:%d", n, (int )code, len); break; case OP_BACKREF_MULTI: + case OP_BACKREF_MULTI_IC: fputs(" ", f); GET_LENGTH_INC(len, bp); for (i = 0; i < len; i++) { @@ -5078,7 +5232,7 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp) break; default: - fprintf(stderr, "regex_print_compiled_byte_code: undefined code %d\n", + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); } } @@ -5104,7 +5258,7 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg) else fputs(" ", f); } - regex_print_compiled_byte_code(f, bp, &bp); + onig_print_compiled_byte_code(f, bp, &bp); } fprintf(f, "\n"); @@ -5145,7 +5299,13 @@ print_indent_tree(FILE* f, Node* node, int indent) case N_STRING: fprintf(f, "", (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTRING(node).s; p < NSTRING(node).end; p++) fputc(*p, f); + for (p = NSTRING(node).s; p < NSTRING(node).end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } + } break; case N_CCLASS: @@ -5171,10 +5331,6 @@ print_indent_tree(FILE* f, Node* node, int indent) switch (NCTYPE(node).type) { case CTYPE_WORD: fputs("word", f); break; case CTYPE_NOT_WORD: fputs("not word", f); break; -#ifdef USE_SBMB_CLASS - case CTYPE_WORD_SB: fputs("word-sb", f); break; - case CTYPE_WORD_MB: fputs("word-mb", f); break; -#endif default: fprintf(f, "ERROR: undefined ctype.\n"); exit(0); @@ -5273,10 +5429,12 @@ print_indent_tree(FILE* f, Node* node, int indent) fprintf(f, "\n"); fflush(f); } +#endif /* ONIG_DEBUG */ +#ifdef ONIG_DEBUG_PARSE_TREE static void print_tree(FILE* f, Node* node) { print_indent_tree(f, node, 0); } -#endif /* REG_DEBUG */ +#endif diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c new file mode 100644 index 0000000000..7e9c640bb6 --- /dev/null +++ b/ext/mbstring/oniguruma/regenc.c @@ -0,0 +1,586 @@ +/********************************************************************** + + regenc.c - Oniguruma (regular expression library) + + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) + +**********************************************************************/ +#include "regenc.h" + +OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; + +extern int +onigenc_init() +{ + return 0; +} + +extern OnigEncoding +onigenc_get_default_encoding() +{ + return OnigEncDefaultCharEncoding; +} + +extern int +onigenc_set_default_encoding(OnigEncoding enc) +{ + OnigEncDefaultCharEncoding = enc; + return 0; +} + +extern UChar* +onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + if (p < s) { + p += enc_len(enc, *p); + } + return p; +} + +extern UChar* +onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, + UChar* start, UChar* s, UChar** prev) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + + if (p < s) { + if (prev) *prev = p; + p += enc_len(enc, *p); + } + else { + if (prev) *prev = (UChar* )NULL; /* Sorry */ + } + return p; +} + +extern UChar* +onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s) +{ + if (s <= start) + return (UChar* )NULL; + + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); +} + +extern UChar* +onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n) +{ + while (ONIG_IS_NOT_NULL(s) && n-- > 0) { + if (s <= start) + return (UChar* )NULL; + + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); + } + return s; +} + + +#ifndef ONIG_RUBY_M17N + +#ifndef NOT_RUBY +#define USE_APPLICATION_TO_LOWER_CASE_TABLE +#endif + +UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0; + +#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE +static UChar BuiltInAsciiToLowerCaseTable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */ + +unsigned short OnigEncAsciiCtypeTable[256] = { + 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, + 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004, + 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, + 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, + 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, + 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, + 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, + 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, + 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51, + 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, + 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, + 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0, + 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871, + 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, + 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, + 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +extern void +onigenc_set_default_caseconv_table(UChar* table) +{ + if (table == (UChar* )0) { +#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE + table = BuiltInAsciiToLowerCaseTable; +#else + return ; +#endif + } + + if (table != OnigEncAsciiToLowerCaseTable) { + OnigEncAsciiToLowerCaseTable = table; + } +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) +{ + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); +} + +extern int +onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes) +{ + return 0; +} + +extern int +onigenc_nothing_get_fold_match_info(UChar* p, UChar* end, + OnigEncFoldMatchInfo** info) +{ + return -1; +} + +extern int +onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb, + OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]) +{ + return -1; +} + +/* for single byte encodings */ +extern int +onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower) +{ + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + return 1; /* return byte length of converted char to lower */ +} + +extern int +onigenc_ascii_mbc_is_case_ambig(UChar* p) +{ + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); +} + +extern OnigCodePoint +onigenc_single_byte_mbc_to_code(UChar* p, UChar* end) +{ + return (OnigCodePoint )(*p); +} + +extern int +onigenc_single_byte_code_to_mbclen(OnigCodePoint code) +{ + return 1; +} + +extern int +onigenc_single_byte_code_to_mbc_first(OnigCodePoint code) +{ + return (code & 0xff); +} + +extern int +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + *buf = (code & 0xff); + return 1; +} + +extern UChar* +onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s) +{ + return s; +} + +extern int +onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end) +{ + return TRUE; +} + +extern OnigCodePoint +onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + c = *p++; + len = enc_len(enc, c); + n = c; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +extern int +onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower) +{ + int len; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + return 1; + } + else { + len = enc_len(enc, *p); + if (lower != p) { + /* memcpy(lower, p, len); */ + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + return len; /* return byte length of converted to lower char */ + } +} + +extern int +onigenc_mbn_mbc_is_case_ambig(UChar* p) +{ + if (ONIGENC_IS_MBC_ASCII(p)) + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + + return FALSE; +} + +extern int +onigenc_mb2_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb4_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb2_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} + +extern int +onigenc_mb4_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff000000) != 0) { + first = (code >> 24) & 0xff; + } + else if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} + +extern int +onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) { + *p++ = ((code >> 8) & 0xff); + } + *p++ = (code & 0xff); + +#if 1 + if (enc_len(enc, buf[0]) != (p - buf)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) { + *p++ = ((code >> 24) & 0xff); + } + if ((code & 0xff0000) != 0) { + *p++ = ((code >> 16) & 0xff); + } + if ((code & 0xff00) != 0) { + *p++ = ((code >> 8) & 0xff); + } + *p++ = (code & 0xff); + +#if 1 + if (enc_len(enc, buf[0]) != (p - buf)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + int first = onigenc_mb2_code_to_mbc_first(code); + return (enc_len(enc, first) > 1 ? TRUE : FALSE); + } + + ctype &= ~ONIGENC_CTYPE_WORD; + if (ctype == 0) return FALSE; + } + + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +extern int +onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + int first = onigenc_mb4_code_to_mbc_first(code); + return (enc_len(enc, first) > 1 ? TRUE : FALSE); + } + + ctype &= ~ONIGENC_CTYPE_WORD; + if (ctype == 0) return FALSE; + } + + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +extern int +onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes) +{ + static OnigCodePoint list[] = { 0xdf }; + *codes = list; + return 1; +} + +extern int +onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end, + OnigEncFoldMatchInfo** info) +{ + /* German alphabet ess-tsett(U+00DF) */ + static OnigEncFoldMatchInfo ss = { + 3, + { 1, 2, 2 }, + { "\337", "ss", "SS" } /* 0337: 0xdf */ + }; + + if (p >= end) return -1; + + if (*p == 0xdf) { + *info = &ss; + return 1; + } + else if (p + 1 < end) { + if ((*p == 'S' && *(p+1) == 'S') || + (*p == 's' && *(p+1) == 's')) { + *info = &ss; + return 2; + } + } + + return -1; /* is not a fold string. */ +} + +#else /* ONIG_RUBY_M17N */ + +extern int +onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype) +{ + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + return m17n_isalpha(enc, code); + break; + case ONIGENC_CTYPE_BLANK: + return ONIGENC_IS_CODE_BLANK(enc, (int )(code)); + break; + case ONIGENC_CTYPE_CNTRL: + return m17n_iscntrl(enc, code); + break; + case ONIGENC_CTYPE_DIGIT: + return m17n_isdigit(enc, code); + break; + case ONIGENC_CTYPE_GRAPH: + return ONIGENC_IS_CODE_GRAPH(enc, (int )(code)); + break; + case ONIGENC_CTYPE_LOWER: + return m17n_islower(enc, code); + break; + case ONIGENC_CTYPE_PRINT: + return m17n_isprint(enc, code); + break; + case ONIGENC_CTYPE_PUNCT: + return m17n_ispunct(enc, code); + break; + case ONIGENC_CTYPE_SPACE: + return m17n_isspace(enc, code); + break; + case ONIGENC_CTYPE_UPPER: + return m17n_isupper(enc, code); + break; + case ONIGENC_CTYPE_XDIGIT: + return m17n_isxdigit(enc, code); + break; + case ONIGENC_CTYPE_WORD: + return m17n_iswchar(enc, code); + break; + case ONIGENC_CTYPE_ASCII: + return (code < 128 ? TRUE : FALSE); + break; + case ONIGENC_CTYPE_ALNUM: + return m17n_isalnum(enc, code); + break; + default: + break; + } + + return 0; +} + +extern int +onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + int c, len; + + m17n_mbcput(enc, code, buf); + c = m17n_firstbyte(enc, code); + len = enc_len(enc, c); + return len; +} + +extern int +onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf) +{ + unsigned int c, low; + + c = m17n_codepoint(enc, p, p + enc_len(enc, *p)); + low = m17n_tolower(enc, c); + m17n_mbcput(enc, low, buf); + + return m17n_codelen(enc, low); +} + +extern int +onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p) +{ + unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p)); + + if (m17n_isupper(enc, c) || m17n_islower(enc, c)) + return TRUE; + return FALSE; +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) +{ + UChar *p; + int len; + + if (s <= start) return s; + p = s; + + while (!m17n_islead(enc, *p) && p > start) p--; + while (p + (len = enc_len(enc, *p)) < s) { + p += len; + } + if (p + len == s) return s; + return p; +} + +extern int +onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end) +{ + return ONIGENC_IS_SINGLEBYTE(enc); +} + +extern void +onigenc_set_default_caseconv_table(UChar* table) { } + +#endif /* ONIG_RUBY_M17N */ diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h new file mode 100644 index 0000000000..935080a950 --- /dev/null +++ b/ext/mbstring/oniguruma/regenc.h @@ -0,0 +1,97 @@ +/********************************************************************** + + regenc.h - Oniguruma (regular expression library) + + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) + +**********************************************************************/ +#ifndef REGENC_H +#define REGENC_H + +#ifndef ONIG_SOURCE_IS_WRAPPED +#include "config.h" +#endif + +#include "oniguruma.h" + +#ifndef NULL +#define NULL ((void* )0) +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* error codes */ +/* internal error */ +#define ONIGERR_MEMORY -5 +#define ONIGERR_TYPE_BUG -6 +/* syntax error [-400, -999] */ +#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 + +#define ONIG_NEWLINE '\n' +#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE) +#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) +#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL +#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) + + +#ifdef ONIG_RUBY_M17N + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF + +#else /* ONIG_RUBY_M17N */ + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + +/* for encoding system implementation (internal) */ +ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes)); +ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info)); +ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])); + +/* methods for single byte encoding */ +ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower)); +ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p)); +ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); +ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s)); +ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end)); + +/* methods for multi byte encoding */ +ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower)); +ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p)); +ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); + +ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes)); +ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info)); + +#endif /* is not ONIG_RUBY_M17N */ + + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable; +ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[]; + +#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] +#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ + ((OnigEncAsciiCtypeTable[code] & ctype) != 0) +#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ + ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)) + +#endif /* REGENC_H */ diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index a1e86c34f1..5a6c31b82e 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "regint.h" @@ -17,118 +17,136 @@ #endif extern char* -regex_error_code_to_format(int code) +onig_error_code_to_format(int code) { char *p; if (code >= 0) return (char* )0; switch (code) { - case REG_MISMATCH: + case ONIG_MISMATCH: p = "mismatch"; break; - case REG_NO_SUPPORT_CONFIG: + case ONIG_NO_SUPPORT_CONFIG: p = "no support in this configuration"; break; - case REGERR_MEMORY: + case ONIGERR_MEMORY: p = "fail to memory allocation"; break; - case REGERR_MATCH_STACK_LIMIT_OVER: + case ONIGERR_MATCH_STACK_LIMIT_OVER: p = "match-stack limit over"; break; - case REGERR_TYPE_BUG: + case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; - case REGERR_PARSER_BUG: + case ONIGERR_PARSER_BUG: p = "internal parser error (bug)"; break; - case REGERR_STACK_BUG: + case ONIGERR_STACK_BUG: p = "stack error (bug)"; break; - case REGERR_UNDEFINED_BYTECODE: + case ONIGERR_UNDEFINED_BYTECODE: p = "undefined bytecode (bug)"; break; - case REGERR_UNEXPECTED_BYTECODE: + case ONIGERR_UNEXPECTED_BYTECODE: p = "unexpected bytecode (bug)"; break; - case REGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: p = "default multibyte-encoding is not setted"; break; - case REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: p = "can't convert to wide-char on specified multibyte-encoding"; break; - case REGERR_END_PATTERN_AT_LEFT_BRACE: + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: p = "end pattern at left brace"; break; - case REGERR_END_PATTERN_AT_LEFT_BRACKET: + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: p = "end pattern at left bracket"; break; - case REGERR_EMPTY_CHAR_CLASS: + case ONIGERR_EMPTY_CHAR_CLASS: p = "empty char-class"; break; - case REGERR_PREMATURE_END_OF_CHAR_CLASS: + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: p = "premature end of char-class"; break; - case REGERR_END_PATTERN_AT_BACKSLASH: + case ONIGERR_END_PATTERN_AT_BACKSLASH: p = "end pattern at backslash"; break; - case REGERR_END_PATTERN_AT_META: + case ONIGERR_END_PATTERN_AT_META: p = "end pattern at meta"; break; - case REGERR_END_PATTERN_AT_CONTROL: + case ONIGERR_END_PATTERN_AT_CONTROL: p = "end pattern at control"; break; - case REGERR_META_CODE_SYNTAX: + case ONIGERR_META_CODE_SYNTAX: p = "illegal meta-code syntax"; break; - case REGERR_CONTROL_CODE_SYNTAX: + case ONIGERR_CONTROL_CODE_SYNTAX: p = "illegal control-code syntax"; break; - case REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: p = "char-class value at end of range"; break; - case REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: p = "char-class value at start of range"; break; - case REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: p = "unmatched range specifier in char-class"; break; - case REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: p = "target of repeat operator is not specified"; break; - case REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: p = "target of repeat operator is invalid"; break; - case REGERR_NESTED_REPEAT_OPERATOR: + case ONIGERR_NESTED_REPEAT_OPERATOR: p = "nested repeat operator"; break; - case REGERR_UNMATCHED_CLOSE_PARENTHESIS: + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: p = "unmatched close parenthesis"; break; - case REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: p = "end pattern with unmatched parenthesis"; break; - case REGERR_END_PATTERN_IN_GROUP: + case ONIGERR_END_PATTERN_IN_GROUP: p = "end pattern in group"; break; - case REGERR_UNDEFINED_GROUP_OPTION: + case ONIGERR_UNDEFINED_GROUP_OPTION: p = "undefined group option"; break; - case REGERR_INVALID_POSIX_BRACKET_TYPE: + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: p = "invalid POSIX bracket type"; break; - case REGERR_INVALID_LOOK_BEHIND_PATTERN: + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: p = "invalid pattern in look-behind"; break; - case REGERR_INVALID_REPEAT_RANGE_PATTERN: + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: p = "invalid repeat range {lower,upper}"; break; - case REGERR_TOO_BIG_NUMBER: + case ONIGERR_TOO_BIG_NUMBER: p = "too big number"; break; - case REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: p = "too big number for repeat range"; break; - case REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: p = "upper is smaller than lower in repeat range"; break; - case REGERR_EMPTY_RANGE_IN_CHAR_CLASS: + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: p = "empty range in char class"; break; - case REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: p = "mismatch multibyte code length in char-class range"; break; - case REGERR_TOO_MANY_MULTI_BYTE_RANGES: + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: p = "too many multibyte code ranges are specified"; break; - case REGERR_TOO_SHORT_MULTI_BYTE_STRING: + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: p = "too short multibyte code string"; break; - case REGERR_TOO_BIG_BACKREF_NUMBER: + case ONIGERR_TOO_BIG_BACKREF_NUMBER: p = "too big backref number"; break; - case REGERR_INVALID_BACKREF: -#ifdef USE_NAMED_SUBEXP + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP p = "invalid backref number/name"; break; #else p = "invalid backref number"; break; #endif - case REGERR_TOO_BIG_WIDE_CHAR_VALUE: + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: p = "too big wide-char value"; break; - case REGERR_TOO_LONG_WIDE_CHAR_VALUE: + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: p = "too long wide-char value"; break; - case REGERR_INVALID_WIDE_CHAR_VALUE: + case ONIGERR_INVALID_WIDE_CHAR_VALUE: p = "invalid wide-char value"; break; - case REGERR_INVALID_SUBEXP_NAME: - p = "invalid subexp name"; break; - case REGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: p = "undefined name <%n> reference"; break; - case REGERR_UNDEFINED_GROUP_REFERENCE: - p = "undefined group reference"; break; - case REGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: p = "multiplex definition name <%n> call"; break; - case REGERR_NEVER_ENDING_RECURSION: + case ONIGERR_NEVER_ENDING_RECURSION: p = "never ending recursion"; break; - case REGERR_OVER_THREAD_PASS_LIMIT_COUNT: + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: p = "over thread pass limit count"; break; default: @@ -139,31 +157,36 @@ regex_error_code_to_format(int code) } -/* for REG_MAX_ERROR_MESSAGE_LEN */ +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ #define MAX_ERROR_PAR_LEN 30 extern int #ifdef HAVE_STDARG_PROTOTYPES -regex_error_code_to_str(UChar* s, int code, ...) +onig_error_code_to_str(UChar* s, int code, ...) #else -regex_error_code_to_str(UChar* s, code, va_alist) +onig_error_code_to_str(s, code, va_alist) + UChar* s; int code; va_dcl #endif { UChar *p, *q; - RegErrorInfo* einfo; + OnigErrorInfo* einfo; int len; va_list vargs; va_init_list(vargs, code); switch (code) { - case REGERR_UNDEFINED_NAME_REFERENCE: - case REGERR_MULTIPLEX_DEFINITION_NAME_CALL: - einfo = va_arg(vargs, RegErrorInfo*); + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); len = einfo->par_end - einfo->par; - q = regex_error_code_to_format(code); + q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { @@ -194,7 +217,7 @@ regex_error_code_to_str(UChar* s, code, va_alist) break; default: - q = regex_error_code_to_format(code); + q = onig_error_code_to_format(code); len = strlen(q); xmemcpy(s, q, len); s[len] = '\0'; @@ -208,13 +231,13 @@ regex_error_code_to_str(UChar* s, code, va_alist) void #ifdef HAVE_STDARG_PROTOTYPES -regex_snprintf_with_pattern(char buf[], int bufsize, RegCharEncoding enc, +onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...) #else -regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) +onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) char buf[]; int bufsize; - RegCharEncoding enc; + OnigEncoding enc; char* pat; char* pat_end; const char *fmt; @@ -222,7 +245,7 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) #endif { int n, need, len; - char *p, *s; + UChar *p, *s; va_list args; va_init_list(args, fmt); @@ -236,21 +259,22 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) s = buf + strlen(buf); p = pat; - while (p < pat_end) { - if (*p == '\\') { + while (p < (UChar* )pat_end) { + if (*p == MC_ESC) { *s++ = *p++; - len = mblen(enc, *p); + len = enc_len(enc, *p); while (len-- > 0) *s++ = *p++; } else if (*p == '/') { - *s++ = '\\'; + *s++ = MC_ESC; *s++ = *p++; } - else if (ismb(enc, *p)) { - len = mblen(enc, *p); + else if (ONIGENC_IS_MBC_HEAD(enc, *p)) { + len = enc_len(enc, *p); while (len-- > 0) *s++ = *p++; } - else if (!IS_PRINT(*p) && !IS_SPACE(*p)) { + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { char b[5]; sprintf(b, "\\%03o", *p & 0377); len = strlen(b); diff --git a/ext/mbstring/oniguruma/regex.c b/ext/mbstring/oniguruma/regex.c index 0c4a43be9e..764b3963d9 100644 --- a/ext/mbstring/oniguruma/regex.c +++ b/ext/mbstring/oniguruma/regex.c @@ -2,15 +2,27 @@ regex.c - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ /* * Source wrapper for Ruby. */ +#define ONIG_SOURCE_IS_WRAPPED + +#include "regint.h" +#include "regex.h" #include "regparse.c" #include "regcomp.c" #include "regexec.c" +#include "regenc.c" #include "reggnu.c" #include "regerror.c" + +#ifndef ONIG_RUBY_M17N +#include "enc/ascii.c" +#include "enc/utf8.c" +#include "enc/euc_jp.c" +#include "enc/sjis.c" +#endif diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index b7319ac4fb..c8772ba1f6 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,44 +2,79 @@ regexec.c - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "regint.h" -static UChar* -get_right_adjust_char_head_with_prev(RegCharEncoding code, - UChar* start, UChar* s, UChar** prev); -static UChar* -step_backward_char(RegCharEncoding code, UChar* start, UChar* s, int n); +static void +region_list_clear(OnigRegion** list) +{ + int i; + if (IS_NOT_NULL(list)) { + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (IS_NOT_NULL(list[i])) { + xfree(list[i]); + list[i] = (OnigRegion* )0; + } + } + } +} + +static void +region_list_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->list)) { + region_list_clear(r->list); + xfree(r->list); + r->list = (OnigRegion** )0; + } +} + +static OnigRegion** +region_list_new() +{ + int i; + OnigRegion** list; + + list = (OnigRegion** )xmalloc(sizeof(OnigRegion*) + * (ONIG_MAX_CAPTURE_HISTORY_GROUP + 1)); + CHECK_NULL_RETURN(list); + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + list[i] = (OnigRegion* )0; + } + + return list; +} extern void -regex_region_clear(RegRegion* region) +onig_region_clear(OnigRegion* region) { int i; for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = REG_REGION_NOTPOS; + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; } + region_list_clear(region->list); } extern int -regex_region_resize(RegRegion* region, int n) +onig_region_resize(OnigRegion* region, int n) { int i; region->num_regs = n; - if (n < REG_NREGION) - n = REG_NREGION; + if (n < ONIG_NREGION) + n = ONIG_NREGION; if (region->allocated == 0) { region->beg = (int* )xmalloc(n * sizeof(int)); region->end = (int* )xmalloc(n * sizeof(int)); if (region->beg == 0 || region->end == 0) - return REGERR_MEMORY; + return ONIGERR_MEMORY; region->allocated = n; } @@ -48,38 +83,111 @@ regex_region_resize(RegRegion* region, int n) region->end = (int* )xrealloc(region->end, n * sizeof(int)); if (region->beg == 0 || region->end == 0) - return REGERR_MEMORY; + return ONIGERR_MEMORY; region->allocated = n; } for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = REG_REGION_NOTPOS; + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + + if (IS_NOT_NULL(region->list)) + region_list_clear(region->list); + + return 0; +} + +static int +region_ensure_size(OnigRegion* region, int n) +{ + int i, new_size; + + if (region->allocated >= n) + return 0; + + new_size = region->allocated; + if (new_size == 0) + new_size = ONIG_NREGION; + while (new_size < n) + new_size *= 2; + + if (region->allocated == 0) { + region->beg = (int* )xmalloc(new_size * sizeof(int)); + region->end = (int* )xmalloc(new_size * sizeof(int)); + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = new_size; + } + else if (region->allocated < new_size) { + region->beg = (int* )xrealloc(region->beg, new_size * sizeof(int)); + region->end = (int* )xrealloc(region->end, new_size * sizeof(int)); + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = new_size; + } + + for (i = region->num_regs; i < n; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; } return 0; } +static int +region_list_add_entry(OnigRegion* region, int group, int start, int end) +{ + int r, pos; + OnigRegion** list; + + if (group > ONIG_MAX_CAPTURE_HISTORY_GROUP) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + if (IS_NULL(region->list)) { + region->list = region_list_new(); + CHECK_NULL_RETURN_VAL(region->list, ONIGERR_MEMORY); + } + + list = region->list; + if (IS_NULL(list[group])) { + list[group] = onig_region_new(); + CHECK_NULL_RETURN_VAL(list[group], ONIGERR_MEMORY); + } + + r = region_ensure_size(list[group], list[group]->num_regs + 1); + if (r != 0) return r; + + pos = list[group]->num_regs; + list[group]->beg[pos] = start; + list[group]->end[pos] = end; + list[group]->num_regs++; + + return 0; +} + static void -regex_region_init(RegRegion* region) +onig_region_init(OnigRegion* region) { region->num_regs = 0; region->allocated = 0; region->beg = (int* )0; region->end = (int* )0; + region->list = (OnigRegion** )0; } -extern RegRegion* -regex_region_new() +extern OnigRegion* +onig_region_new() { - RegRegion* r; + OnigRegion* r; - r = (RegRegion* )xmalloc(sizeof(RegRegion)); - regex_region_init(r); + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + onig_region_init(r); return r; } extern void -regex_region_free(RegRegion* r, int free_self) +onig_region_free(OnigRegion* r, int free_self) { if (r) { if (r->allocated > 0) { @@ -87,12 +195,13 @@ regex_region_free(RegRegion* r, int free_self) if (r->end) xfree(r->end); r->allocated = 0; } + region_list_free(r); if (free_self) xfree(r); } } extern void -regex_region_copy(RegRegion* to, RegRegion* from) +onig_region_copy(OnigRegion* to, OnigRegion* from) { #define RREGC_SIZE (sizeof(int) * from->num_regs) int i; @@ -117,6 +226,29 @@ regex_region_copy(RegRegion* to, RegRegion* from) to->end[i] = from->end[i]; } to->num_regs = from->num_regs; + + if (IS_NOT_NULL(from->list)) { + if (IS_NULL(to->list)) { + to->list = region_list_new(); + } + + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (IS_NOT_NULL(from->list[i])) { + if (IS_NULL(to->list[i])) + to->list[i] = onig_region_new(); + + onig_region_copy(to->list[i], from->list[i]); + } + else { + if (IS_NOT_NULL(to->list[i])) { + xfree(to->list[i]); + to->list[i] = (OnigRegion* )0; + } + } + } + } + else + region_list_free(to); } @@ -173,13 +305,14 @@ typedef struct _StackType { #define STK_MEM_END 0x0300 #define STK_REPEAT_INC 0x0400 /* avoided by normal-POP */ -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ #define STK_REPEAT 0x0700 #define STK_CALL_FRAME 0x0800 #define STK_RETURN 0x0900 #define STK_MEM_END_MARK 0x0a00 -#define STK_VOID 0x0b00 /* for fill a blank */ +#define STK_VOID 0x0b00 /* for fill a blank */ +#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */ /* stack type check mask */ #define STK_MASK_POP_USED 0x00ff @@ -189,8 +322,8 @@ typedef struct _StackType { typedef struct { void* stack_p; int stack_n; - RegOptionType options; - RegRegion* region; + OnigOptionType options; + OnigRegion* region; UChar* start; /* search start position (for \G: BEGIN_POSITION) */ } MatchArg; @@ -245,18 +378,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, x = (StackType* )xmalloc(sizeof(StackType) * n * 2); if (IS_NULL(x)) { STACK_SAVE; - return REGERR_MEMORY; + return ONIGERR_MEMORY; } xmemcpy(x, stk_base, n * sizeof(StackType)); n *= 2; } else { n *= 2; - if (n > MATCH_STACK_LIMIT_SIZE) return REGERR_MATCH_STACK_LIMIT_OVER; + if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER; x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); if (IS_NULL(x)) { STACK_SAVE; - return REGERR_MEMORY; + return ONIGERR_MEMORY; } } *arg_stk = x + (stk - stk_base); @@ -365,6 +498,24 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ } while (0) +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while (0) + #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_NULL_CHECK_START;\ @@ -373,6 +524,13 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, STACK_INC;\ } while(0) +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + #define STACK_PUSH_CALL_FRAME(pat) do {\ STACK_ENSURE(1);\ stk->type = STK_CALL_FRAME;\ @@ -387,7 +545,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p) \ if ((p) < stk_base) goto stack_error; #else @@ -522,6 +680,114 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ } while(0) +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + #define STACK_GET_REPEAT(id, k) do {\ int level = 0;\ k = stk;\ @@ -559,68 +825,63 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) -#define CASETABLE_TOLOWER(c) (casetable[c]) - -/* byte_code is already converted to lower-case at string compile time */ -#define SBTRANSCMP(byte_code,c) (byte_code == CASETABLE_TOLOWER(c)) - #define STRING_CMP(s1,s2,len) do {\ - if (ignore_case) {\ - int slen; \ - while (len > 0) {\ - slen = mblen(encode, *s1); \ - if (slen == 1) {\ - if (CASETABLE_TOLOWER(*s1) != CASETABLE_TOLOWER(*s2)) \ - goto fail;\ - s1++; s2++; len--; \ - } \ - else {\ - len -= slen; \ - while (slen-- > 0) { \ - if (*s1++ != *s2++) goto fail;\ - } \ - }\ - }\ - }\ - else {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) goto fail;\ - }\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ }\ } while(0) +#define STRING_CMP_IC(s1,ps2,len) do {\ + if (string_cmp_ic(encode, s1, ps2, len) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, + UChar* s1, UChar** ps2, int mblen) +{ + UChar buf1[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar buf2[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar *p1, *p2, *end, *s2; + int len1, len2; + + s2 = *ps2; + end = s1 + mblen; + while (s1 < end) { + len1 = ONIGENC_MBC_TO_LOWER(enc, s1, buf1); + len2 = ONIGENC_MBC_TO_LOWER(enc, s2, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + + s1 += enc_len(enc, *s1); + s2 += enc_len(enc, *s2); + } + + *ps2 = s2; + return 1; +} + #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ is_fail = 0;\ - if (ignore_case) {\ - int slen; \ - while (len > 0) {\ - slen = mblen(encode, *s1); \ - if (slen == 1) {\ - if (CASETABLE_TOLOWER(*s1) != CASETABLE_TOLOWER(*s2)) {\ - is_fail = 1; break;\ - }\ - s1++; s2++; len--; \ - } \ - else {\ - len -= slen; \ - while (slen-- > 0) { \ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ - } \ - if (is_fail != 0) break;\ - }\ - }\ - }\ - else {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ }\ }\ } while(0) +#define STRING_CMP_VALUE_IC(s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, s1, ps2, len) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + #define ON_STR_BEGIN(s) ((s) == str) #define ON_STR_END(s) ((s) == end) #define IS_EMPTY_STR (str == end) @@ -630,7 +891,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#ifdef REG_DEBUG_STATISTICS +#ifdef ONIG_DEBUG_STATISTICS #define USE_TIMEOFDAY @@ -673,7 +934,15 @@ static int MaxStackDepth = 0; OpTime[OpCurr] += TIMEDIFF(te, ts);\ } while (0) -extern void regex_statistics_init() +#ifdef RUBY_PLATFORM +static VALUE onig_stat_print() +{ + onig_print_statistics(stderr); + return Qnil; +} +#endif + +extern void onig_statistics_init() { int i; for (i = 0; i < 256; i++) { @@ -682,26 +951,18 @@ extern void regex_statistics_init() MaxStackDepth = 0; #ifdef RUBY_PLATFORM - rb_define_global_function("regex_stat_print", regex_stat_print, 0); + ONIG_RUBY_DEFINE_GLOBAL_FUNCTION("onig_stat_print", onig_stat_print, 0); #endif } -#ifdef RUBY_PLATFORM -static VALUE regex_stat_print() -{ - regex_print_statistics(stderr); - return Qnil; -} -#endif - extern void -regex_print_statistics(FILE* f) +onig_print_statistics(FILE* f) { int i; fprintf(f, " count prev time\n"); - for (i = 0; RegOpInfo[i].opcode >= 0; i++) { + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], RegOpInfo[i].name); + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); } fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); } @@ -720,24 +981,24 @@ regex_print_statistics(FILE* f) #endif extern int -regex_is_in_wc_range(UChar* p, WCINT wc) +onig_is_in_code_range(UChar* p, OnigCodePoint code) { - WCINT n, *data; + OnigCodePoint n, *data; int low, high, x; - GET_WCINT(n, p); - data = (WCINT* )p; + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; data++; for (low = 0, high = n; low < high; ) { x = (low + high) >> 1; - if (wc > data[x * 2 + 1]) + if (code > data[x * 2 + 1]) low = x + 1; else high = x; } - return ((low < n && wc >= data[low * 2]) ? 1 : 0); + return ((low < n && code >= data[low * 2]) ? 1 : 0); } @@ -757,13 +1018,12 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, { static UChar FinishCode[] = { OP_FINISH }; - int i, n, num_mem, best_len, pop_level, find_cond; + int i, n, num_mem, best_len, pop_level; LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; - RegOptionType option = reg->options; - RegCharEncoding encode = reg->enc; - unsigned char* casetable = DefaultTransTable; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; int ignore_case; UChar *s, *q, *sbegin; UChar *p = reg->p; @@ -776,7 +1036,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); ignore_case = IS_IGNORECASE(option); - find_cond = IS_FIND_CONDITION(option); pop_level = reg->stack_pop_level; num_mem = reg->num_mem; repeat_stk = (StackIndex* )alloca_base; @@ -791,7 +1050,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; } -#ifdef REG_DEBUG_MATCH +#ifdef ONIG_DEBUG_MATCH fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", (int )str, (int )end, (int )sstart, (int )sprev); fprintf(stderr, "size: %d, start offset: %d\n", @@ -799,17 +1058,17 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, #endif STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ - best_len = REG_MISMATCH; + best_len = ONIG_MISMATCH; s = sstart; while (1) { -#ifdef REG_DEBUG_MATCH +#ifdef ONIG_DEBUG_MATCH { UChar *q, *bp, buf[50]; int len; fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = mblen(encode, *q); + len = enc_len(encode, *q); while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } @@ -817,7 +1076,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, *bp = 0; fputs(buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - regex_print_compiled_byte_code(stderr, p, NULL); + onig_print_compiled_byte_code(stderr, p, NULL); fprintf(stderr, "\n"); } #endif @@ -827,9 +1086,10 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_END: STAT_OP_IN(OP_END); n = s - sstart; if (n > best_len) { - RegRegion* region = msa->region; + OnigRegion* region = msa->region; best_len = n; if (region) { +#ifdef USE_POSIX_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; @@ -837,46 +1097,71 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, rmt[0].rm_eo = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->backtrack_mem, i)) + if (BIT_STATUS_AT(reg->bt_mem_start, i)) rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; else rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; - rmt[i].rm_eo = (find_cond != 0 + rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )mem_end_stk[i])) - str; } else { - rmt[i].rm_so = rmt[i].rm_eo = REG_REGION_NOTPOS; + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; } } } else { +#endif /* USE_POSIX_REGION_OPTION */ region->beg[0] = sstart - str; region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->backtrack_mem, i)) + if (BIT_STATUS_AT(reg->bt_mem_start, i)) region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; else region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - region->end[i] = (find_cond != 0 + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )mem_end_stk[i])) - str; } else { - region->beg[i] = region->end[i] = REG_REGION_NOTPOS; + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; } } - } - } - } + + if (reg->capture_history != 0) { + UChar *pstart, *pend; + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) { + stkp = stk_base; + do { + STACK_GET_MEM_RANGE(stkp, i, pstart, pend); + if (stkp < stk) { + int r; + r = region_list_add_entry(region, i, + pstart - str, pend - str); + if (r) { + STACK_SAVE; + return r; + } + } + stkp++; + } while (stkp < stk); + } + } + } /* list of captures */ +#ifdef USE_POSIX_REGION_OPTION + } /* else IS_POSIX_REGION() */ +#endif + } /* if (region) */ + } /* n > best_len */ STAT_OP_OUT; - if (find_cond) { + if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = REG_MISMATCH; + best_len = ONIG_MISMATCH; goto fail; /* for retry */ } if (IS_FIND_LONGEST(option) && s < end) { @@ -902,9 +1187,19 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, break; case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); - if (! SBTRANSCMP(*p, *s)) goto fail; - DATA_ENSURE(1); - p++; s++; + { + int len; + UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + + len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf); + DATA_ENSURE(len); + q = lowbuf; + s += enc_len(encode, *s); + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } STAT_OP_OUT; break; @@ -976,13 +1271,26 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, break; case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen); - while (tlen-- > 0) { - if (! SBTRANSCMP(*p, *s)) goto fail; - p++; s++; + { + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf); + DATA_ENSURE(len); + sprev = s; + s += enc_len(encode, *s); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } } - sprev = s - 1; + STAT_OP_OUT; continue; break; @@ -1078,31 +1386,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; - s += mblen(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */ + s += enc_len(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */ STAT_OP_OUT; break; case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); - if (! ismb(encode, *s)) goto fail; + if (! ONIGENC_IS_MBC_HEAD(encode, *s)) goto fail; cclass_mb: GET_LENGTH_INC(tlen, p); { - WCINT wc; + OnigCodePoint code; UChar *ss; - int mb_len = mblen(encode, *s); + int mb_len = enc_len(encode, *s); DATA_ENSURE(mb_len); ss = s; s += mb_len; - wc = MB2WC(ss, s, encode); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); -#ifdef UNALIGNED_WORD_ACCESS - if (! regex_is_in_wc_range(p, wc)) goto fail; +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; #else q = p; ALIGNMENT_RIGHT(q); - if (! regex_is_in_wc_range(q, wc)) goto fail; + if (! onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; @@ -1111,7 +1419,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); - if (ismb(encode, *s)) { + if (ONIGENC_IS_MBC_HEAD(encode, *s)) { p += SIZE_BITSET; goto cclass_mb; } @@ -1131,12 +1439,12 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; - s += mblen(encode, *s); + s += enc_len(encode, *s); STAT_OP_OUT; break; case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); - if (! ismb(encode, *s)) { + if (! ONIGENC_IS_MBC_HEAD(encode, *s)) { DATA_ENSURE(1); s++; GET_LENGTH_INC(tlen, p); @@ -1147,9 +1455,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, cclass_mb_not: GET_LENGTH_INC(tlen, p); { - WCINT wc; + OnigCodePoint code; UChar *ss; - int mb_len = mblen(encode, *s); + int mb_len = enc_len(encode, *s); if (s + mb_len > end) { s = end; @@ -1159,14 +1467,14 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, ss = s; s += mb_len; - wc = MB2WC(ss, s, encode); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); -#ifdef UNALIGNED_WORD_ACCESS - if (regex_is_in_wc_range(p, wc)) goto fail; +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; #else q = p; ALIGNMENT_RIGHT(q); - if (regex_is_in_wc_range(q, wc)) goto fail; + if (onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; @@ -1177,7 +1485,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); - if (ismb(encode, *s)) { + if (ONIGENC_IS_MBC_HEAD(encode, *s)) { p += SIZE_BITSET; goto cclass_mb_not; } @@ -1194,51 +1502,56 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, break; case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); - DATA_ENSURE(1); - if (ismb(encode, *s)) { - n = mblen(encode, *s); + n = enc_len(encode, *s); + if (n > 1) { DATA_ENSURE(n); s += n; } else { - if (! IS_MULTILINE(option)) { - if (IS_NEWLINE(*s)) goto fail; - } + DATA_ENSURE(1); + if (ONIG_IS_NEWLINE(*s)) goto fail; s++; } STAT_OP_OUT; break; + case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); + n = enc_len(encode, *s); + DATA_ENSURE(n); + s += n; + STAT_OP_OUT; + break; + case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); - if (! IS_MULTILINE(option)) { - while (s < end) { - STACK_PUSH_ALT(p, s, sprev); - if (ismb(encode, *s)) { - n = mblen(encode, *s); - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - if (IS_NEWLINE(*s)) goto fail; - sprev = s; - s++; - } + while (s < end) { + STACK_PUSH_ALT(p, s, sprev); + n = enc_len(encode, *s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + if (ONIG_IS_NEWLINE(*s)) goto fail; + sprev = s; + s++; } } - else { - while (s < end) { - STACK_PUSH_ALT(p, s, sprev); - if (ismb(encode, *s)) { - n = mblen(encode, *s); - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); + while (s < end) { + STACK_PUSH_ALT(p, s, sprev); + n = enc_len(encode, *s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; } } STAT_OP_OUT; @@ -1249,16 +1562,14 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - if (ismb(encode, *s)) { - n = mblen(encode, *s); + n = enc_len(encode, *s); + if (n > 1) { DATA_ENSURE(n); sprev = s; s += n; } else { - if (! IS_MULTILINE(option)) { - if (IS_NEWLINE(*s)) goto fail; - } + if (ONIG_IS_NEWLINE(*s)) goto fail; sprev = s; s++; } @@ -1267,53 +1578,57 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STAT_OP_OUT; break; - case OP_WORD: STAT_OP_IN(OP_WORD); - DATA_ENSURE(1); - if (! IS_WORD_STR_INC(encode, s, end)) - goto fail; + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (s < end) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enc_len(encode, *s); + if (n >1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; STAT_OP_OUT; break; - case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); + case OP_WORD: STAT_OP_IN(OP_WORD); DATA_ENSURE(1); - if (IS_WORD_STR_INC(encode, s, end)) + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - STAT_OP_OUT; - break; -#ifdef USE_SBMB_CLASS - case OP_WORD_SB: STAT_OP_IN(OP_WORD_SB); - DATA_ENSURE(1); - if (! IS_SB_WORD(encode, *s)) - goto fail; - s++; + s += enc_len(encode, *s); STAT_OP_OUT; break; - case OP_WORD_MB: STAT_OP_IN(OP_WORD_MB); + case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); DATA_ENSURE(1); - if (! IS_MB_WORD(encode, *s)) + if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - n = mblen(encode, *s); - DATA_ENSURE(n); - s += n; + s += enc_len(encode, *s); STAT_OP_OUT; break; -#endif case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { DATA_ENSURE(1); - if (! IS_WORD_STR(encode, s, end)) + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; } else if (ON_STR_END(s)) { - if (! IS_WORD_STR(encode, sprev, end)) + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } else { - if (IS_WORD_STR(encode, s, end) == IS_WORD_STR(encode, sprev, end)) + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } STAT_OP_OUT; @@ -1322,15 +1637,16 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK(1) && IS_WORD_STR(encode, s, end)) + if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; } else if (ON_STR_END(s)) { - if (IS_WORD_STR(encode, sprev, end)) + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } else { - if (IS_WORD_STR(encode, s, end) != IS_WORD_STR(encode, sprev, end)) + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } STAT_OP_OUT; @@ -1339,8 +1655,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, #ifdef USE_WORD_BEGIN_END case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK(1) && IS_WORD_STR(encode, s, end)) { - if (ON_STR_BEGIN(s) || !IS_WORD_STR(encode, sprev, end)) { + if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { STAT_OP_OUT; continue; } @@ -1349,8 +1665,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, break; case OP_WORD_END: STAT_OP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && IS_WORD_STR(encode, sprev, end)) { - if (ON_STR_END(s) || !IS_WORD_STR(encode, s, end)) { + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { STAT_OP_OUT; continue; } @@ -1379,7 +1695,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STAT_OP_OUT; continue; } - else if (IS_NEWLINE(*sprev) && !ON_STR_END(s)) { + else if (ONIG_IS_NEWLINE(*sprev) && !ON_STR_END(s)) { STAT_OP_OUT; continue; } @@ -1389,7 +1705,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_END_LINE: STAT_OP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !IS_NEWLINE(*sprev)) { + if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) { #endif if (IS_NOTEOL(msa->options)) goto fail; STAT_OP_OUT; @@ -1398,7 +1714,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, } #endif } - else if (IS_NEWLINE(*s)) { + else if (ONIG_IS_NEWLINE(*s)) { STAT_OP_OUT; continue; } @@ -1408,7 +1724,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !IS_NEWLINE(*sprev)) { + if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) { #endif if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ STAT_OP_OUT; @@ -1417,7 +1733,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, } #endif } - if (IS_NEWLINE(*s) && ON_STR_END(s+1)) { + if (ONIG_IS_NEWLINE(*s) && ON_STR_END(s+1)) { STAT_OP_OUT; continue; } @@ -1463,9 +1779,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, #ifdef USE_SUBEXP_CALL case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); STACK_GET_MEM_START(mem, stkp); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - STACK_PUSH_MEM_END(mem, s); STAT_OP_OUT; continue; break; @@ -1509,19 +1825,51 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (BIT_STATUS_AT(reg->backtrack_mem, mem)) + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (find_cond != 0 + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = mblen(encode, *sprev)) < s) + while (sprev + (len = enc_len(encode, *sprev)) < s) + sprev += len; + + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(pstart, &s, n); + while (sprev + (len = enc_len(encode, *sprev)) < s) sprev += len; STAT_OP_OUT; @@ -1541,12 +1889,12 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - if (BIT_STATUS_AT(reg->backtrack_mem, mem)) + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (find_cond != 0 + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; @@ -1556,7 +1904,46 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = mblen(encode, *sprev)) < s) + while (sprev + (len = enc_len(encode, *sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(pstart, &swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enc_len(encode, *sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -1598,10 +1985,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_NULL_CHECK(isnull, mem, s); if (isnull) { -#ifdef REG_DEBUG_MATCH +#ifdef ONIG_DEBUG_MATCH fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", (int )mem, (int )s); #endif + null_check_found: /* empty loop founded, skip next instruction */ switch (*p++) { case OP_JUMP: @@ -1622,6 +2010,56 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, continue; break; +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + STAT_OP_OUT; + continue; + break; +#endif + +#ifdef USE_SUBEXP_CALL + case OP_NULL_CHECK_END_MEMST_PUSH: + STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#else + STACK_NULL_CHECK_REC(isnull, mem, s); +#endif + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + STAT_OP_OUT; + continue; + break; +#endif + case OP_JUMP: STAT_OP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; @@ -1757,16 +2195,21 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, stkp = STACK_AT(si); #endif stkp->u.repeat.count++; - if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(stkp->u.repeat.pcode, s, sprev); + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); } STAT_OP_OUT; continue; @@ -1814,9 +2257,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); - s = MBBACK(encode, str, s, (int )tlen); + s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; - sprev = regex_get_prev_char_head(encode, str, s); + sprev = onigenc_get_prev_char_head(encode, str, s); STAT_OP_OUT; continue; break; @@ -1824,7 +2267,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); - q = MBBACK(encode, str, s, (int )tlen); + q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(q)) { /* too short case -> success. ex. /(? 0xfe - 0xa1) -#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) -#define sjis_ismbfirst(c) ismb(REGCODE_SJIS, (c)) -#define sjis_ismbtrail(c) SJIS_FOLLOW_TABLE[(c)] - -extern WCINT -regex_mb2wc(UChar* p, UChar* end, RegCharEncoding code) -{ - int c, i, len; - WCINT n; - - if (code == REGCODE_UTF8) { - c = *p++; - len = mblen(code,c); - if (len > 1) { - len--; - n = c & ((1 << (6 - len)) - 1); - while (len--) { - c = *p++; - n = (n << 6) | (c & ((1 << 6) - 1)); - } - } - else - n = c; - } - else { - c = *p++; - len = mblen(code,c); - n = c; - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - } - return n; -} -#endif /* REG_RUBY_M17N */ - -extern UChar* -regex_get_left_adjust_char_head(RegCharEncoding code, UChar* start, UChar* s) -{ - UChar *p; - int len; - - if (s <= start) return s; - p = s; - -#ifdef REG_RUBY_M17N - while (!m17n_islead(code, *p) && p > start) p--; - while (p + (len = mblen(code, *p)) < s) { - p += len; - } - if (p + len == s) return s; - return p; -#else - - if (code == REGCODE_ASCII) { - return p; - } - else if (code == REGCODE_EUCJP) { - while (!eucjp_islead(*p) && p > start) p--; - len = mblen(code, *p); - if (p + len > s) return p; - p += len; - return p + ((s - p) & ~1); - } - else if (code == REGCODE_SJIS) { - if (sjis_ismbtrail(*p)) { - while (p > start) { - if (! sjis_ismbfirst(*--p)) { - p++; - break; - } - } - } - len = mblen(code, *p); - if (p + len > s) return p; - p += len; - return p + ((s - p) & ~1); - } - else { /* REGCODE_UTF8 */ - while (!utf8_islead(*p) && p > start) p--; - return p; - } -#endif /* REG_RUBY_M17N */ + return ONIGERR_UNEXPECTED_BYTECODE; } -extern UChar* -regex_get_right_adjust_char_head(RegCharEncoding code, UChar* start, UChar* s) -{ - UChar* p = regex_get_left_adjust_char_head(code, start, s); - - if (p < s) { - p += mblen(code, *p); - } - return p; -} static UChar* -get_right_adjust_char_head_with_prev(RegCharEncoding code, - UChar* start, UChar* s, UChar** prev) -{ - UChar* p = regex_get_left_adjust_char_head(code, start, s); - - if (p < s) { - if (prev) *prev = p; - p += mblen(code, *p); - } - else { - if (prev) *prev = (UChar* )NULL; /* Sorry */ - } - return p; -} - -extern UChar* -regex_get_prev_char_head(RegCharEncoding code, UChar* start, UChar* s) -{ - if (s <= start) - return (UChar* )NULL; - - return regex_get_left_adjust_char_head(code, start, s - 1); -} - -static UChar* -step_backward_char(RegCharEncoding code, UChar* start, UChar* s, int n) -{ - while (IS_NOT_NULL(s) && n-- > 0) { - if (s <= start) - return (UChar* )NULL; - - s = regex_get_left_adjust_char_head(code, start, s - 1); - } - return s; -} - -static UChar* -slow_search(RegCharEncoding code, UChar* target, UChar* target_end, +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, UChar* text, UChar* text_end, UChar* text_range) { UChar *t, *p, *s, *end; @@ -2089,14 +2372,15 @@ slow_search(RegCharEncoding code, UChar* target, UChar* target_end, if (t == target_end) return s; } - s += mblen(code, *s); + s += enc_len(enc, *s); } return (UChar* )NULL; } +#if 0 static int -str_trans_match_after_head_byte(RegCharEncoding code, +str_trans_match_after_head_byte(OnigEncoding enc, int len, UChar* t, UChar* tend, UChar* p) { while (--len > 0) { @@ -2105,23 +2389,19 @@ str_trans_match_after_head_byte(RegCharEncoding code, } if (len == 0) { + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + while (t < tend) { - len = mblen(code, *p); - if (len == 1) { - if (*t != TOLOWER(code, *p)) - break; - p++; - t++; - } - else { - if (*t != *p++) break; - t++; - while (--len > 0) { - if (*t != *p) break; - t++; p++; - } - if (len > 0) break; - } + len = enc_len(enc, *p); + lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) break; + lowlen--; + } + if (lowlen > 0) break; + p += len; } if (t == tend) return 1; @@ -2129,14 +2409,36 @@ str_trans_match_after_head_byte(RegCharEncoding code, return 0; } +#endif + +static int +str_lower_case_match(OnigEncoding enc, UChar* t, UChar* tend, UChar* p) +{ + int len, lowlen; + UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + + while (t < tend) { + len = enc_len(enc, *p); + lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) return 0; + lowlen--; + } + p += len; + } + + return 1; +} static UChar* -slow_search_ic(RegCharEncoding code, +slow_search_ic(OnigEncoding enc, UChar* target, UChar* target_end, UChar* text, UChar* text_end, UChar* text_range) { - int len; + int len, lowlen; UChar *t, *p, *s, *end; + UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; end = text_end - (target_end - target) + 1; if (end > text_range) @@ -2145,13 +2447,21 @@ slow_search_ic(RegCharEncoding code, s = text; while (s < end) { - len = mblen(code, *s); - if (*s == *target || (len == 1 && TOLOWER(code, *s) == *target)) { - p = s + 1; + len = enc_len(enc, *s); + lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf); + if (*target == *lowbuf) { + p = lowbuf + 1; t = target + 1; - if (str_trans_match_after_head_byte(code, len, t, target_end, p)) - return s; + while (--lowlen > 0) { + if (*p != *t) break; + p++; *t++; + } + if (lowlen == 0) { + if (str_lower_case_match(enc, t, target_end, s + len)) + return s; + } } + s += len; } @@ -2159,7 +2469,7 @@ slow_search_ic(RegCharEncoding code, } static UChar* -slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end, +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) { UChar *t, *p, *s; @@ -2168,7 +2478,7 @@ slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end, if (s > text_start) s = text_start; else - s = regex_get_left_adjust_char_head(code, adjust_text, s); + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { if (*s == *target) { @@ -2182,36 +2492,45 @@ slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end, if (t == target_end) return s; } - s = regex_get_prev_char_head(code, adjust_text, s); + s = onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; } static UChar* -slow_search_backward_ic(RegCharEncoding code, +slow_search_backward_ic(OnigEncoding enc, UChar* target,UChar* target_end, UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) { - int len; + int len, lowlen; UChar *t, *p, *s; + UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; s = text_end - (target_end - target); if (s > text_start) s = text_start; else - s = regex_get_left_adjust_char_head(code, adjust_text, s); + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { - len = mblen(code, *s); - if (*s == *target || (len == 1 && TOLOWER(code, *s) == *target)) { - p = s + 1; + len = enc_len(enc, *s); + lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf); + if (*target == *lowbuf) { + p = lowbuf + 1; t = target + 1; - if (str_trans_match_after_head_byte(code, len, t, target_end, p)) - return s; + while (--lowlen > 0) { + if (*p != *t) break; + p++; *t++; + } + if (lowlen == 0) { + if (str_lower_case_match(enc, t, target_end, s + len)) + return s; + } } - s = regex_get_prev_char_head(code, adjust_text, s); + + s = onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; @@ -2232,7 +2551,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, tail = target_end - 1; s = text; while ((s - text) < target_end - target) { - s += mblen(reg->enc, *s); + s += enc_len(reg->enc, *s); } s--; /* set to text check tail position. */ @@ -2249,7 +2568,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, p++; t = p; while ((p - t) < skip) { - p += mblen(reg->enc, *p); + p += enc_len(reg->enc, *p); } s += (p - t); } @@ -2267,7 +2586,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, p++; t = p; while ((p - t) < skip) { - p += mblen(reg->enc, *p); + p += enc_len(reg->enc, *p); } s += (p - t); } @@ -2314,23 +2633,26 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end, } static int -set_bm_backward_skip(UChar* s, UChar* end, RegCharEncoding enc, +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case, int** skip) { int i, len; + UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * REG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return REGERR_MEMORY; + *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*skip)) return ONIGERR_MEMORY; } len = end - s; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*skip)[i] = len; if (ignore_case) { - for (i = len - 1; i > 0; i--) - (*skip)[TOLOWER(enc, s[i])] = i; + for (i = len - 1; i > 0; i--) { + ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf); + (*skip)[*lowbuf] = i; + } } else { for (i = len - 1; i > 0; i--) @@ -2349,7 +2671,7 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, if (text_start < s) s = text_start; else - s = regex_get_left_adjust_char_head(reg->enc, adjust_text, s); + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); while (s >= text) { p = s; @@ -2361,27 +2683,27 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, return s; s -= reg->int_map_backward[*s]; - s = regex_get_left_adjust_char_head(reg->enc, adjust_text, s); + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); } return (UChar* )NULL; } static UChar* -map_search(RegCharEncoding code, UChar map[], UChar* text, UChar* text_range) +map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range) { UChar *s = text; while (s < text_range) { if (map[*s]) return s; - s += mblen(code, *s); + s += enc_len(enc, *s); } return (UChar* )NULL; } static UChar* -map_search_backward(RegCharEncoding code, UChar map[], +map_search_backward(OnigEncoding enc, UChar map[], UChar* text, UChar* adjust_text, UChar* text_start) { UChar *s = text_start; @@ -2389,14 +2711,14 @@ map_search_backward(RegCharEncoding code, UChar map[], while (s >= text) { if (map[*s]) return s; - s = regex_get_prev_char_head(code, adjust_text, s); + s = onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; } extern int -regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at, RegRegion* region, - RegOptionType option) +onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, + OnigOptionType option) { int r; UChar *prev; @@ -2404,13 +2726,18 @@ regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at, RegRegion* region, MATCH_ARG_INIT(msa, option, region, at); - if (region && !IS_POSIX_REGION(option)) - r = regex_region_resize(region, reg->num_mem + 1); + if (region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize(region, reg->num_mem + 1); + } else r = 0; if (r == 0) { - prev = regex_get_prev_char_head(reg->enc, str, at); + prev = onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, at, prev, &msa); } MATCH_ARG_FREE(msa); @@ -2423,40 +2750,40 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, { UChar *p, *pprev = (UChar* )NULL; -#ifdef REG_DEBUG_SEARCH +#ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", (int )str, (int )end, (int )s, (int )range); #endif p = s; if (reg->dmin > 0) { - if (IS_SINGLEBYTE_CODE(reg->enc)) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { p += reg->dmin; } else { UChar *q = p + reg->dmin; - while (p < q) p += mblen(reg->enc, *p); + while (p < q) p += enc_len(reg->enc, *p); } } retry: switch (reg->optimize) { - case REG_OPTIMIZE_EXACT: + case ONIG_OPTIMIZE_EXACT: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case REG_OPTIMIZE_EXACT_IC: + case ONIG_OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case REG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case REG_OPTIMIZE_EXACT_BM_NOT_REV: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); break; - case REG_OPTIMIZE_MAP: + case ONIG_OPTIMIZE_MAP: p = map_search(reg->enc, reg->map, p, range); break; } @@ -2465,7 +2792,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, if (p - reg->dmin < s) { retry_gate: pprev = p; - p += mblen(reg->enc, *p); + p += enc_len(reg->enc, *p); goto retry; } @@ -2475,19 +2802,21 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { - prev = regex_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (!IS_NEWLINE(*prev)) + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (!ONIG_IS_NEWLINE(*prev)) goto retry_gate; } break; case ANCHOR_END_LINE: if (ON_STR_END(p)) { - prev = regex_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (prev && IS_NEWLINE(*prev)) + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIG_IS_NEWLINE(*prev)) goto retry_gate; } - else if (!IS_NEWLINE(*p)) + else if (!ONIG_IS_NEWLINE(*p)) goto retry_gate; break; } @@ -2497,33 +2826,33 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, *low = p; if (low_prev) { if (*low > s) - *low_prev = regex_get_prev_char_head(reg->enc, s, p); + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); else - *low_prev = regex_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); } } else { - if (reg->dmax != INFINITE_DISTANCE) { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { *low = p - reg->dmax; if (*low > s) { - *low = get_right_adjust_char_head_with_prev(reg->enc, s, - *low, low_prev); + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, low_prev); if (low_prev && IS_NULL(*low_prev)) - *low_prev = regex_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); } else { if (low_prev) - *low_prev = regex_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); } } } /* no needs to adjust *high, *high is used as range check only */ *high = p - reg->dmin; -#ifdef REG_DEBUG_SEARCH +#ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); @@ -2534,7 +2863,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, return 0; /* fail */ } -static int set_bm_backward_skip P_((UChar* s, UChar* end, RegCharEncoding enc, +static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, int ignore_case, int** skip)); #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 @@ -2551,19 +2880,19 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, retry: switch (reg->optimize) { - case REG_OPTIMIZE_EXACT: + case ONIG_OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case REG_OPTIMIZE_EXACT_IC: + case ONIG_OPTIMIZE_EXACT_IC: p = slow_search_backward_ic(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case REG_OPTIMIZE_EXACT_BM: - case REG_OPTIMIZE_EXACT_BM_NOT_REV: + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: if (IS_NULL(reg->int_map_backward)) { if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) goto exact_method; @@ -2576,7 +2905,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, end, p); break; - case REG_OPTIMIZE_MAP: + case ONIG_OPTIMIZE_MAP: p = map_search_backward(reg->enc, reg->map, range, adjrange, p); break; } @@ -2588,8 +2917,8 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { - prev = regex_get_prev_char_head(reg->enc, adjrange, p); - if (!IS_NEWLINE(*prev)) { + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (!ONIG_IS_NEWLINE(*prev)) { p = prev; goto retry; } @@ -2598,15 +2927,15 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, case ANCHOR_END_LINE: if (ON_STR_END(p)) { - prev = regex_get_prev_char_head(reg->enc, adjrange, p); + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(prev)) goto fail; - if (IS_NEWLINE(*prev)) { + if (ONIG_IS_NEWLINE(*prev)) { p = prev; goto retry; } } - else if (!IS_NEWLINE(*p)) { - p = regex_get_prev_char_head(reg->enc, adjrange, p); + else if (!ONIG_IS_NEWLINE(*p)) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(p)) goto fail; goto retry; } @@ -2615,13 +2944,13 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, } /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != INFINITE_DISTANCE) { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { *low = p - reg->dmax; *high = p - reg->dmin; - *high = regex_get_right_adjust_char_head(reg->enc, adjrange, *high); + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); } -#ifdef REG_DEBUG_SEARCH +#ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "backward_search_range: low: %d, high: %d\n", (int )(*low - str), (int )(*high - str)); #endif @@ -2629,7 +2958,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, } fail: -#ifdef REG_DEBUG_SEARCH +#ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "backward_search_range: fail.\n"); #endif return 0; /* fail */ @@ -2637,37 +2966,41 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, extern int -regex_search(regex_t* reg, UChar* str, UChar* end, - UChar* start, UChar* range, RegRegion* region, RegOptionType option) +onig_search(regex_t* reg, UChar* str, UChar* end, + UChar* start, UChar* range, OnigRegion* region, OnigOptionType option) { int r; UChar *s, *prev; MatchArg msa; - if (REG_STATE(reg) == REG_STATE_NORMAL) { + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { reg->state++; /* increment as search counter */ if (IS_NOT_NULL(reg->chain)) { - regex_chain_reduce(reg); + onig_chain_reduce(reg); reg->state++; } } else { int n = 0; - while (REG_STATE(reg) < REG_STATE_NORMAL) { + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) - return REGERR_OVER_THREAD_PASS_LIMIT_COUNT; + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } reg->state++; /* increment as search counter */ } -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "regex_search (entry point): str: %d, end: %d, start: %d, range: %d\n", +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - if (region && !IS_POSIX_REGION(option)) { - r = regex_region_resize(region, reg->num_mem + 1); + if (region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize(region, reg->num_mem + 1); if (r) goto finish_no_msa; } @@ -2675,7 +3008,7 @@ regex_search(regex_t* reg, UChar* str, UChar* end, #define MATCH_AND_RETURN_CHECK \ r = match_at(reg, str, end, s, prev, &msa);\ - if (r != REG_MISMATCH) {\ + if (r != ONIG_MISMATCH) {\ if (r >= 0) goto match;\ goto finish; /* error */ \ } @@ -2718,9 +3051,9 @@ regex_search(regex_t* reg, UChar* str, UChar* end, if (semi_end - start > reg->anchor_dmax) { start = semi_end - reg->anchor_dmax; if (start < end) - start = regex_get_right_adjust_char_head(reg->enc, str, start); + start = onigenc_get_right_adjust_char_head(reg->enc, str, start); else { /* match with empty at end */ - start = regex_get_prev_char_head(reg->enc, str, end); + start = onigenc_get_prev_char_head(reg->enc, str, end); } } if (semi_end - (range - 1) < reg->anchor_dmin) { @@ -2735,13 +3068,13 @@ regex_search(regex_t* reg, UChar* str, UChar* end, } if (semi_end - start < reg->anchor_dmin) { start = semi_end - reg->anchor_dmin; - start = regex_get_left_adjust_char_head(reg->enc, str, start); + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); if (range > start) goto mismatch_no_msa; } } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { - if (IS_NEWLINE(end[-1])) { + if (ONIG_IS_NEWLINE(end[-1])) { semi_end = end - 1; if (semi_end > str && start <= semi_end) { goto end_buf; @@ -2759,8 +3092,8 @@ regex_search(regex_t* reg, UChar* str, UChar* end, else if (str == end) { /* empty string */ static UChar* address_for_empty_string = ""; -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "regex_search: empty string.\n"); +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); #endif if (reg->threshold_len == 0) { @@ -2774,8 +3107,8 @@ regex_search(regex_t* reg, UChar* str, UChar* end, goto mismatch_no_msa; } -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "regex_search(apply anchor): end: %d, start: %d, range: %d\n", +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", (int )(end - str), (int )(start - str), (int )(range - str)); #endif @@ -2784,23 +3117,23 @@ regex_search(regex_t* reg, UChar* str, UChar* end, s = start; if (range > start) { /* forward search */ if (s > str) - prev = regex_get_prev_char_head(reg->enc, str, s); + prev = onigenc_get_prev_char_head(reg->enc, str, s); else prev = (UChar* )NULL; - if (reg->optimize != REG_OPTIMIZE_NONE) { + if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; sch_range = range; if (reg->dmax != 0) { - if (reg->dmax == INFINITE_DISTANCE) + if (reg->dmax == ONIG_INFINITE_DISTANCE) sch_range = end; else { sch_range += reg->dmax; if (sch_range > end) sch_range = end; } } - if (reg->dmax != INFINITE_DISTANCE && + if (reg->dmax != ONIG_INFINITE_DISTANCE && (end - start) >= reg->threshold_len) { do { if (! forward_search_range(reg, str, end, s, sch_range, @@ -2812,13 +3145,13 @@ regex_search(regex_t* reg, UChar* str, UChar* end, while (s <= high) { MATCH_AND_RETURN_CHECK; prev = s; - s += mblen(reg->enc, *s); + s += enc_len(reg->enc, *s); } if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { if (IS_NOT_NULL(prev)) { - while (!IS_NEWLINE(*prev) && s < range) { + while (!ONIG_IS_NEWLINE(*prev) && s < range) { prev = s; - s += mblen(reg->enc, *s); + s += enc_len(reg->enc, *s); } } } @@ -2835,15 +3168,15 @@ regex_search(regex_t* reg, UChar* str, UChar* end, do { MATCH_AND_RETURN_CHECK; prev = s; - s += mblen(reg->enc, *s); + s += enc_len(reg->enc, *s); } while (s <= range); /* exec s == range, because empty match with /$/. */ } else { /* backward search */ - if (reg->optimize != REG_OPTIMIZE_NONE) { + if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; - adjrange = regex_get_left_adjust_char_head(reg->enc, str, range); - if (reg->dmax != INFINITE_DISTANCE && + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + if (reg->dmax != ONIG_INFINITE_DISTANCE && (end - range) >= reg->threshold_len) { do { sch_start = s + reg->dmax; @@ -2856,7 +3189,7 @@ regex_search(regex_t* reg, UChar* str, UChar* end, s = high; while (s >= low) { - prev = regex_get_prev_char_head(reg->enc, str, s); + prev = onigenc_get_prev_char_head(reg->enc, str, s); MATCH_AND_RETURN_CHECK; s = prev; } @@ -2868,14 +3201,14 @@ regex_search(regex_t* reg, UChar* str, UChar* end, sch_start = s; if (reg->dmax != 0) { - if (reg->dmax == INFINITE_DISTANCE) + if (reg->dmax == ONIG_INFINITE_DISTANCE) sch_start = end; else { sch_start += reg->dmax; if (sch_start > end) sch_start = end; else - sch_start = regex_get_left_adjust_char_head(reg->enc, start, - sch_start); + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start); } } if (backward_search_range(reg, str, end, sch_start, range, adjrange, @@ -2884,14 +3217,14 @@ regex_search(regex_t* reg, UChar* str, UChar* end, } do { - prev = regex_get_prev_char_head(reg->enc, str, s); + prev = onigenc_get_prev_char_head(reg->enc, str, s); MATCH_AND_RETURN_CHECK; s = prev; } while (s >= range); } mismatch: - r = REG_MISMATCH; + r = ONIG_MISMATCH; finish: MATCH_ARG_FREE(msa); @@ -2899,22 +3232,27 @@ regex_search(regex_t* reg, UChar* str, UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region && !IS_POSIX_REGION(option)) - regex_region_clear(region); + if (IS_FIND_NOT_EMPTY(reg->options) && region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + onig_region_clear(region); + } -#ifdef REG_DEBUG - if (r != REG_MISMATCH) - fprintf(stderr, "regex_search: error %d\n", r); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); #endif return r; mismatch_no_msa: - r = REG_MISMATCH; + r = ONIG_MISMATCH; finish_no_msa: reg->state--; /* decrement as search counter */ -#ifdef REG_DEBUG - if (r != REG_MISMATCH) - fprintf(stderr, "regex_search: error %d\n", r); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); #endif return r; @@ -2924,8 +3262,26 @@ regex_search(regex_t* reg, UChar* str, UChar* end, return s - str; } +extern OnigEncoding +onig_get_encoding(regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(regex_t* reg) +{ + return reg->options; +} + +extern OnigSyntaxType* +onig_get_syntax(regex_t* reg) +{ + return reg->syntax; +} + extern const char* -regex_version(void) +onig_version(void) { #define MSTR(a) # a diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c index 7b95e26f76..9c6a2161c2 100644 --- a/ext/mbstring/oniguruma/reggnu.c +++ b/ext/mbstring/oniguruma/reggnu.c @@ -2,13 +2,17 @@ reggnu.c - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "regint.h" +#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */ +#include "oniggnu.h" +#endif + #if defined(RUBY_PLATFORM) || defined(RUBY) -#ifndef REG_RUBY_M17N +#ifndef ONIG_RUBY_M17N #define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY #endif #endif @@ -18,25 +22,25 @@ #endif extern void -re_free_registers(RegRegion* r) +re_free_registers(OnigRegion* r) { /* 0: don't free self */ - regex_region_free(r, 0); + onig_region_free(r, 0); } extern int re_adjust_startpos(regex_t* reg, const char* string, int size, int startpos, int range) { - if (startpos > 0 && mbmaxlen(reg->enc) != 1 && startpos < size) { + if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { UChar *p; UChar *s = (UChar* )string + startpos; if (range > 0) { - p = regex_get_right_adjust_char_head(reg->enc, (UChar* )string, s); + p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); } else { - p = regex_get_left_adjust_char_head(reg->enc, (UChar* )string, s); + p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); } return p - (UChar* )string; } @@ -48,29 +52,30 @@ extern int re_match(regex_t* reg, const char* str, int size, int pos, struct re_registers* regs) { - return regex_match(reg, (UChar* )str, (UChar* )(str + size), - (UChar* )(str + pos), regs, REG_OPTION_NONE); + return onig_match(reg, (UChar* )str, (UChar* )(str + size), + (UChar* )(str + pos), regs, ONIG_OPTION_NONE); } extern int re_search(regex_t* bufp, const char* string, int size, int startpos, int range, struct re_registers* regs) { - return regex_search(bufp, (UChar* )string, (UChar* )(string + size), - (UChar* )(string + startpos), - (UChar* )(string + startpos + range), regs, REG_OPTION_NONE); + return onig_search(bufp, (UChar* )string, (UChar* )(string + size), + (UChar* )(string + startpos), + (UChar* )(string + startpos + range), + regs, ONIG_OPTION_NONE); } extern int re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) { int r; - RegErrorInfo einfo; + OnigErrorInfo einfo; - r = regex_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); + r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); if (r != 0) { if (IS_NOT_NULL(ebuf)) - (void )regex_error_code_to_str((UChar* )ebuf, r, &einfo); + (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); } return r; @@ -80,19 +85,19 @@ extern int re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) { int r; - RegErrorInfo einfo; - RegCharEncoding enc; + OnigErrorInfo einfo; + OnigEncoding enc; /* I think encoding and options should be arguments of this function. But this is adapted to present re.c. (2002/11/29) */ - enc = RegDefaultCharEncoding; + enc = OnigEncDefaultCharEncoding; - r = regex_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), - reg->options, enc, RegDefaultSyntax, &einfo); + r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), + reg->options, enc, OnigDefaultSyntax, &einfo); if (r != 0) { if (IS_NOT_NULL(ebuf)) - (void )regex_error_code_to_str((UChar* )ebuf, r, &einfo); + (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); } return r; } @@ -100,23 +105,20 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) extern void re_free_pattern(regex_t* reg) { - regex_free(reg); + onig_free(reg); } extern int re_alloc_pattern(regex_t** reg) { - if (RegDefaultCharEncoding == REGCODE_UNDEF) - return REGERR_DEFAULT_ENCODING_IS_NOT_SETTED; - - return regex_alloc_init(reg, REG_OPTION_DEFAULT, RegDefaultCharEncoding, - RegDefaultSyntax); + return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding, + OnigDefaultSyntax); } extern void re_set_casetable(const char* table) { - regex_set_default_trans_table((UChar* )table); + onigenc_set_default_caseconv_table((UChar* )table); } #ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY @@ -167,7 +169,7 @@ static const unsigned char mbctab_sjis[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -200,16 +202,39 @@ const unsigned char *re_mbctab = mbctab_ascii; #endif extern void -#ifdef REG_RUBY_M17N -re_mbcinit(RegCharEncoding enc) +#ifdef ONIG_RUBY_M17N +re_mbcinit(OnigEncoding enc) #else re_mbcinit(int mb_code) #endif { -#ifdef REG_RUBY_M17N - RegDefaultCharEncoding = enc; +#ifdef ONIG_RUBY_M17N + + onigenc_set_default_encoding(enc); + #else - RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code]; + + OnigEncoding enc; + + switch (mb_code) { + case MBCTYPE_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case MBCTYPE_EUC: + enc = ONIG_ENCODING_EUC_JP; + break; + case MBCTYPE_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case MBCTYPE_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); #endif #ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index d646dd11f0..c01a73c546 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -2,56 +2,62 @@ regint.h - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #ifndef REGINT_H #define REGINT_H /* for debug */ -/* #define REG_DEBUG_PARSE_TREE */ -/* #define REG_DEBUG_COMPILE */ -/* #define REG_DEBUG_SEARCH */ -/* #define REG_DEBUG_MATCH */ -/* #define REG_DONT_OPTIMIZE */ +/* #define ONIG_DEBUG_PARSE_TREE */ +/* #define ONIG_DEBUG_COMPILE */ +/* #define ONIG_DEBUG_SEARCH */ +/* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DONT_OPTIMIZE */ /* for byte-code statistical data. */ -/* #define REG_DEBUG_STATISTICS */ +/* #define ONIG_DEBUG_STATISTICS */ -#if defined(REG_DEBUG_PARSE_TREE) || defined(REG_DEBUG_MATCH) || \ - defined(REG_DEBUG_COMPILE) || defined(REG_DEBUG_STATISTICS) -#ifndef REG_DEBUG -#define REG_DEBUG +#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ + defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS) +#ifndef ONIG_DEBUG +#define ONIG_DEBUG #endif #endif #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ (defined(__ppc__) && defined(__APPLE__)) || \ + defined(__x86_64) || defined(__x86_64__) || \ defined(__mc68020__) -#define UNALIGNED_WORD_ACCESS +#define PLATFORM_UNALIGNED_WORD_ACCESS #endif /* config */ -#define USE_NAMED_SUBEXP +/* spec. config */ +#define USE_NAMED_GROUP #define USE_SUBEXP_CALL +#define USE_FOLD_MATCH /* ess-tsett etc... */ +#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ +#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ +#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* internal config */ +#define USE_RECYCLE_NODE #define USE_OP_PUSH_OR_JUMP_EXACT #define USE_QUALIFIER_PEEK_NEXT -#define USE_RECYCLE_NODE -#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ -/* #define USE_SBMB_CLASS */ #define INIT_MATCH_STACK_SIZE 160 -#define MATCH_STACK_LIMIT_SIZE 200000 +#define MATCH_STACK_LIMIT_SIZE 500000 /* interface to external system */ #ifdef NOT_RUBY /* gived from Makefile */ #include "config.h" +#define USE_VARIABLE_META_CHARS #define USE_VARIABLE_SYNTAX #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ -#define DEFAULT_TRANSTABLE_EXIST 1 -#define THREAD_ATOMIC_START /* depend on thread system */ -#define THREAD_ATOMIC_END /* depend on thread system */ -#define THREAD_PASS /* depend on thread system */ +#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define THREAD_ATOMIC_START /* depend on thread system */ +#define THREAD_ATOMIC_END /* depend on thread system */ +#define THREAD_PASS /* depend on thread system */ #define xmalloc malloc #define xrealloc realloc #define xfree free @@ -59,12 +65,11 @@ #include "ruby.h" #include "version.h" #include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ -#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -#define THREAD_ATOMIC_START DEFER_INTS -#define THREAD_ATOMIC_END ENABLE_INTS -#define THREAD_PASS /* I want to use rb_thread_pass() */ -#define WARNING rb_warn -#define VERB_WARNING rb_warning +#define THREAD_ATOMIC_START DEFER_INTS +#define THREAD_ATOMIC_END ENABLE_INTS +#define THREAD_PASS rb_thread_schedule() +#define DEFAULT_WARN_FUNCTION rb_warn +#define DEFAULT_VERB_WARN_FUNCTION rb_warning #if defined(RUBY_VERSION_MAJOR) #if RUBY_VERSION_MAJOR > 1 || \ @@ -74,6 +79,8 @@ #endif #endif +#define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION(s,f,n) \ + rb_define_global_function(s, f, n) #endif /* else NOT_RUBY */ #define THREAD_PASS_LIMIT_COUNT 10 @@ -82,7 +89,9 @@ #define xmemmove memmove #if defined(_WIN32) && !defined(__CYGWIN__) #define xalloca _alloca +#ifdef NOT_RUBY #define vsnprintf _vsnprintf +#endif #else #define xalloca alloca #endif @@ -104,15 +113,12 @@ #include #include -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG # include #endif -#ifdef NOT_RUBY -# include "oniguruma.h" -#else -# include "regex.h" -#endif +#include "regenc.h" +#include "oniguruma.h" #ifdef MIN #undef MIN @@ -123,17 +129,24 @@ #define MIN(a,b) (((a)>(b))?(b):(a)) #define MAX(a,b) (((a)<(b))?(b):(a)) -#ifndef UNALIGNED_WORD_ACCESS +#define IS_NULL(p) (((void*)(p)) == (void*)0) +#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL +#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val) +#define NULL_UCHARP ((UChar* )0) + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS #define WORD_ALIGNMENT_SIZE SIZEOF_INT #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ - (pad_size) = WORD_ALIGNMENT_SIZE - ((int )(addr) % WORD_ALIGNMENT_SIZE);\ + (pad_size) = WORD_ALIGNMENT_SIZE \ + - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ } while (0) #define ALIGNMENT_RIGHT(addr) do {\ (addr) += (WORD_ALIGNMENT_SIZE - 1);\ - (addr) -= ((int )(addr) % WORD_ALIGNMENT_SIZE);\ + (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ } while (0) @@ -216,7 +229,7 @@ #define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p) #endif -#endif /* UNALIGNED_WORD_ACCESS */ +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ /* stack pop level */ #define STACK_POP_LEVEL_FREE 0 @@ -224,12 +237,12 @@ #define STACK_POP_LEVEL_ALL 2 /* optimize flags */ -#define REG_OPTIMIZE_NONE 0 -#define REG_OPTIMIZE_EXACT 1 /* Slow Search */ -#define REG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ -#define REG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ -#define REG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ -#define REG_OPTIMIZE_MAP 5 /* char map */ +#define ONIG_OPTIMIZE_NONE 0 +#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ +#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ +#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ +#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ +#define ONIG_OPTIMIZE_MAP 5 /* char map */ /* bit status */ typedef unsigned int BitStatusType; @@ -255,71 +268,32 @@ typedef unsigned int BitStatusType; #define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) -typedef unsigned int WCINT; - -#define SIZE_WCINT sizeof(WCINT) -#define GET_WCINT(wc,p) (wc) = *((WCINT* )(p)) - -#define INFINITE_DISTANCE ~((RegDistance )0) - -#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define IS_ASCII(c) 1 -#else -# define IS_ASCII(c) isascii(c) -#endif - -#ifdef isblank -# define IS_BLANK(c) (IS_ASCII(c) && isblank(c)) -#else -# define IS_BLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -# define IS_GRAPH(c) (IS_ASCII(c) && isgraph(c)) -#else -# define IS_GRAPH(c) (IS_ASCII(c) && isprint(c) && !isspace(c)) -#endif - -#define IS_PRINT(c) (isprint(c) && IS_ASCII(c)) -#define IS_ALNUM(c) (isalnum(c) && IS_ASCII(c)) -#define IS_ALPHA(c) (isalpha(c) && IS_ASCII(c)) -#define IS_LOWER(c) (islower(c) && IS_ASCII(c)) -#define IS_UPPER(c) (isupper(c) && IS_ASCII(c)) -#define IS_CNTRL(c) (iscntrl(c) && IS_ASCII(c)) -#define IS_PUNCT(c) (ispunct(c) && IS_ASCII(c)) -#define IS_SPACE(c) (isspace(c) && IS_ASCII(c)) -#define IS_DIGIT(c) (isdigit(c) && IS_ASCII(c)) -#define IS_XDIGIT(c) (isxdigit(c) && IS_ASCII(c)) -#define IS_ODIGIT(c) (IS_DIGIT(c) && (c) < '8') - -#define DIGITVAL(c) ((c) - '0') -#define ODIGITVAL(c) DIGITVAL(c) -#define XDIGITVAL(c) \ - (IS_DIGIT(c) ? DIGITVAL(c) : (IS_UPPER(c) ? (c) - 'A' + 10 : (c) - 'a' + 10)) - -#define IS_SINGLELINE(option) ((option) & REG_OPTION_SINGLELINE) -#define IS_MULTILINE(option) ((option) & REG_OPTION_MULTILINE) -#define IS_IGNORECASE(option) ((option) & REG_OPTION_IGNORECASE) -#define IS_EXTEND(option) ((option) & REG_OPTION_EXTEND) -#define IS_FIND_LONGEST(option) ((option) & REG_OPTION_FIND_LONGEST) -#define IS_FIND_NOT_EMPTY(option) ((option) & REG_OPTION_FIND_NOT_EMPTY) +#define DIGITVAL(code) ((code) - '0') +#define ODIGITVAL(code) DIGITVAL(code) +#define XDIGITVAL(enc,code) \ + (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ + : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) + +#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) +#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) +#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) +#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) +#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) +#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) #define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option)) #define IS_FIND_CONDITION(option) ((option) & \ - (REG_OPTION_FIND_LONGEST | REG_OPTION_FIND_NOT_EMPTY)) -#define IS_NOTBOL(option) ((option) & REG_OPTION_NOTBOL) -#define IS_NOTEOL(option) ((option) & REG_OPTION_NOTEOL) -#define IS_POSIX_REGION(option) ((option) & REG_OPTION_POSIX_REGION) + (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) +#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) +#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) +#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) -#ifdef NEWLINE -#undef NEWLINE -#endif -#define NEWLINE '\n' -#define IS_NULL(p) (((void*)(p)) == (void*)0) -#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define IS_NEWLINE(c) ((c) == NEWLINE) -#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL -#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val) +/* OP_SET_OPTION is required for these options. +#define IS_DYNAMIC_OPTION(option) \ + (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) +*/ +/* ignore-case and multibyte status are included in compiled code. */ +#define IS_DYNAMIC_OPTION(option) 0 -#define NULL_UCHARP ((UChar* )0) /* bitset */ #define BITS_PER_BYTE 8 @@ -327,7 +301,7 @@ typedef unsigned int WCINT; #define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) #define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS typedef unsigned int Bits; #else typedef unsigned char Bits; @@ -357,18 +331,18 @@ typedef struct _BBuf { unsigned int alloc; } BBuf; -#define BBUF_INIT(buf,size) regex_bbuf_init((BBuf* )(buf), (size)) +#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) #define BBUF_SIZE_INC(buf,inc) do{\ (buf)->alloc += (inc);\ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ } while (0) #define BBUF_EXPAND(buf,low) do{\ do { (buf)->alloc *= 2; } while ((buf)->alloc < low);\ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ } while (0) #define BBUF_ENSURE_SIZE(buf,size) do{\ @@ -376,7 +350,7 @@ typedef struct _BBuf { while (new_alloc < (size)) { new_alloc *= 2; }\ if ((buf)->alloc != new_alloc) {\ (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ - if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ (buf)->alloc = new_alloc;\ }\ } while (0) @@ -430,112 +404,6 @@ typedef struct _BBuf { #define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] -extern UChar* DefaultTransTable; -#define TOLOWER(enc,c) (DefaultTransTable[c]) - -/* methods for support multi-byte code, */ -#define ismb(code,c) (mblen((code),(c)) != 1) -#define MB2WC(p,end,code) mb2wc((p),(end),(code)) -#define MBBACK(code,start,s,n) step_backward_char((code),(start),(s),(n)) - -#ifdef REG_RUBY_M17N - -#define MB2WC_AVAILABLE(enc) 1 -#define WC2MB_FIRST(enc, wc) m17n_firstbyte((enc),(wc)) - -#define mbmaxlen(enc) m17n_mbmaxlen(enc) -#define mblen(enc,c) m17n_mbclen(enc,c) -#define mbmaxlen_dist(enc) \ - (mbmaxlen(enc) > 0 ? mbmaxlen(enc) : INFINITE_DISTANCE) - -#define IS_SINGLEBYTE_CODE(enc) (m17n_mbmaxlen(enc) == 1) -/* #define IS_INDEPENDENT_TRAIL(enc) m17n_independent_trail(enc) */ -#define IS_INDEPENDENT_TRAIL(enc) IS_SINGLEBYTE_CODE(enc) - -#define IS_CODE_ASCII(enc,c) IS_ASCII(c) -#define IS_CODE_GRAPH(enc,c) IS_GRAPH(c) -#define IS_CODE_PRINT(enc,c) m17n_isprint(enc,c) -#define IS_CODE_ALNUM(enc,c) m17n_isalnum(enc,c) -#define IS_CODE_ALPHA(enc,c) m17n_isalpha(enc,c) -#define IS_CODE_LOWER(enc,c) m17n_islower(enc,c) -#define IS_CODE_UPPER(enc,c) m17n_isupper(enc,c) -#define IS_CODE_CNTRL(enc,c) m17n_iscntrl(enc,c) -#define IS_CODE_PUNCT(enc,c) m17n_ispunct(enc,c) -#define IS_CODE_SPACE(enc,c) m17n_isspace(enc,c) -#define IS_CODE_BLANK(enc,c) IS_BLANK(c) -#define IS_CODE_DIGIT(enc,c) m17n_isdigit(enc,c) -#define IS_CODE_XDIGIT(enc,c) m17n_isxdigit(enc,c) - -#define IS_CODE_WORD(enc,c) m17n_iswchar(enc,c) -#define ISNOT_CODE_WORD(enc,c) (!m17n_iswchar(enc,c)) - -#define IS_WORD_STR(code,s,end) \ - (ismb((code),*(s)) ? (s + mblen((code),*(s)) <= (end)) : \ - m17n_iswchar(code,*(s))) -#define IS_WORD_STR_INC(code,s,end) \ - (ismb((code),*(s)) ? ((s) += mblen((code),*(s)), (s) <= (end)) : \ - (s++, m17n_iswchar(code,s[-1]))) - -#define IS_WORD_HEAD(enc,c) (ismb(enc,c) ? 1 : IS_CODE_WORD(enc,c)) - -#define IS_SB_WORD(code,c) (mblen(code,c) == 1 && IS_CODE_WORD(code,c)) -#define IS_MB_WORD(code,c) ismb(code,c) - -#define mb2wc(p,e,enc) m17n_codepoint((enc),(p),(e)) - -#else /* REG_RUBY_M17N */ - -#define mb2wc(p,e,code) regex_mb2wc((p),(e),(code)) - -#define MB2WC_AVAILABLE(code) 1 -#define WC2MB_FIRST(code, wc) regex_wc2mb_first(code, wc) - -#define mbmaxlen_dist(code) mbmaxlen(code) -#define mbmaxlen(code) regex_mb_max_length(code) -#define mblen(code,c) (code)[(int )(c)] - -#define IS_SINGLEBYTE_CODE(code) ((code) == REGCODE_ASCII) -#define IS_INDEPENDENT_TRAIL(code) \ - ((code) == REGCODE_ASCII || (code) == REGCODE_UTF8) - -#define IS_CODE_ASCII(code,c) IS_ASCII(c) -#define IS_CODE_GRAPH(code,c) IS_GRAPH(c) -#define IS_CODE_PRINT(code,c) IS_PRINT(c) -#define IS_CODE_ALNUM(code,c) IS_ALNUM(c) -#define IS_CODE_ALPHA(code,c) IS_ALPHA(c) -#define IS_CODE_LOWER(code,c) IS_LOWER(c) -#define IS_CODE_UPPER(code,c) IS_UPPER(c) -#define IS_CODE_CNTRL(code,c) IS_CNTRL(c) -#define IS_CODE_PUNCT(code,c) IS_PUNCT(c) -#define IS_CODE_SPACE(code,c) IS_SPACE(c) -#define IS_CODE_BLANK(code,c) IS_BLANK(c) -#define IS_CODE_DIGIT(code,c) IS_DIGIT(c) -#define IS_CODE_ODIGIT(code,c) IS_ODIGIT(c) -#define IS_CODE_XDIGIT(code,c) IS_XDIGIT(c) - -#define IS_SB_WORD(code,c) (IS_CODE_ALNUM(code,c) || (c) == '_') -#define IS_MB_WORD(code,c) ismb(code,c) - -#define IS_CODE_WORD(code,c) \ - (IS_SB_WORD(code,c) && ((c) < 0x80 || (code) == REGCODE_ASCII)) -#define ISNOT_CODE_WORD(code,c) \ - ((!IS_SB_WORD(code,c)) && !ismb(code,c)) - -#define IS_WORD_STR(code,s,end) \ - (ismb((code),*(s)) ? (s + mblen((code),*(s)) <= (end)) : \ - IS_SB_WORD(code,*(s))) -#define IS_WORD_STR_INC(code,s,end) \ - (ismb((code),*(s)) ? ((s) += mblen((code),*(s)), (s) <= (end)) : \ - (s++, IS_SB_WORD(code,s[-1]))) - -#define IS_WORD_HEAD(code,c) (ismb(code,c) ? 1 : IS_SB_WORD(code,c)) - -extern int regex_mb_max_length P_((RegCharEncoding code)); -extern WCINT regex_mb2wc P_((UChar* p, UChar* end, RegCharEncoding code)); -extern int regex_wc2mb_first P_((RegCharEncoding code, WCINT wc)); - -#endif /* not REG_RUBY_M17N */ - #define ANCHOR_BEGIN_BUF (1<<0) #define ANCHOR_BEGIN_LINE (1<<1) @@ -571,7 +439,7 @@ enum OpCode { OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ OP_EXACTMB2N, /* mb-length = 2 */ - OP_EXACTMB3N, /* mb length = 3 */ + OP_EXACTMB3N, /* mb-length = 3 */ OP_EXACTMBN, /* other length */ OP_EXACT1_IC, /* single byte, N = 1, ignore case */ @@ -584,9 +452,12 @@ enum OpCode { OP_CCLASS_MB_NOT, OP_CCLASS_MIX_NOT, - OP_ANYCHAR, /* "." */ - OP_ANYCHAR_STAR, /* ".*" */ + OP_ANYCHAR, /* "." */ + OP_ANYCHAR_ML, /* "." multi-line */ + OP_ANYCHAR_STAR, /* ".*" */ + OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ OP_ANYCHAR_STAR_PEEK_NEXT, + OP_ANYCHAR_ML_STAR_PEEK_NEXT, OP_WORD, OP_NOT_WORD, @@ -608,7 +479,9 @@ enum OpCode { OP_BACKREF2, OP_BACKREF3, OP_BACKREFN, + OP_BACKREFN_IC, OP_BACKREF_MULTI, + OP_BACKREF_MULTI_IC, OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -632,6 +505,8 @@ enum OpCode { OP_REPEAT_INC_NG, /* non greedy */ OP_NULL_CHECK_START, /* null loop checker start */ OP_NULL_CHECK_END, /* null loop checker end */ + OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ + OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ OP_PUSH_POS, /* (?=...) start */ OP_POP_POS, /* (?=...) end */ @@ -668,9 +543,10 @@ typedef int RepeatNumType; #define SIZE_LENGTH sizeof(LengthType) #define SIZE_MEMNUM sizeof(MemNumType) #define SIZE_REPEATNUM sizeof(RepeatNumType) -#define SIZE_OPTION sizeof(RegOptionType) +#define SIZE_OPTION sizeof(OnigOptionType) +#define SIZE_CODE_POINT sizeof(OnigCodePoint) -#ifdef UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS #define GET_RELADDR_INC(addr,p) do{\ addr = *((RelAddrType* )(p));\ (p) += SIZE_RELADDR;\ @@ -697,7 +573,7 @@ typedef int RepeatNumType; } while(0) #define GET_OPTION_INC(option,p) do{\ - option = *((RegOptionType* )(p));\ + option = *((OnigOptionType* )(p));\ (p) += SIZE_OPTION;\ } while(0) #else @@ -718,8 +594,10 @@ typedef int RepeatNumType; #define SERIALIZE_BUFSIZE SIZEOF_INT -#endif /* UNALIGNED_WORD_ACCESS */ +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ +/* code point's address must be aligned address. */ +#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) #define GET_BYTE_INC(byte,p) do{\ byte = *(p);\ (p)++;\ @@ -760,31 +638,50 @@ typedef int RepeatNumType; #define SIZE_OP_RETURN SIZE_OPCODE -#ifdef REG_DEBUG +typedef struct { + OnigCodePoint esc; + OnigCodePoint anychar; + OnigCodePoint anytime; + OnigCodePoint zero_or_one_time; + OnigCodePoint one_or_more_time; + OnigCodePoint anychar_anytime; +} OnigMetaCharTableType; + +extern OnigMetaCharTableType OnigMetaCharTable; + +#define MC_ESC OnigMetaCharTable.esc +#define MC_ANYCHAR OnigMetaCharTable.anychar +#define MC_ANYTIME OnigMetaCharTable.anytime +#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time +#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time +#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime + + +#ifdef ONIG_DEBUG typedef struct { short int opcode; char* name; short int arg_type; -} RegOpInfoType; +} OnigOpInfoType; -extern RegOpInfoType RegOpInfo[]; +extern OnigOpInfoType OnigOpInfo[]; -extern void regex_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp)); +extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp)); -#ifdef REG_DEBUG_STATISTICS -extern void regex_statistics_init P_((void)); -extern void regex_print_statistics P_((FILE* f)); +#ifdef ONIG_DEBUG_STATISTICS +extern void onig_statistics_init P_((void)); +extern void onig_print_statistics P_((FILE* f)); #endif #endif -extern char* regex_error_code_to_format P_((int code)); -extern void regex_snprintf_with_pattern PV_((char buf[], int bufsize, RegCharEncoding enc, char* pat, char* pat_end, char *fmt, ...)); -extern UChar* regex_strdup P_((UChar* s, UChar* end)); -extern int regex_bbuf_init P_((BBuf* buf, int size)); -extern int regex_alloc_init P_((regex_t** reg, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax)); -extern int regex_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, RegErrorInfo* einfo)); -extern void regex_chain_reduce P_((regex_t* reg)); -extern int regex_is_in_wc_range P_((UChar* p, WCINT wc)); +extern char* onig_error_code_to_format P_((int code)); +extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...)); +extern UChar* onig_strdup P_((UChar* s, UChar* end)); +extern int onig_bbuf_init P_((BBuf* buf, int size)); +extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax)); +extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo)); +extern void onig_chain_reduce P_((regex_t* reg)); +extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code)); #endif /* REGINT_H */ diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index 95a55b2a06..673432c00e 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) - Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "regparse.h" @@ -10,126 +10,242 @@ #define WARN_BUFSIZE 256 #define SYN_POSIX_COMMON_OP \ - ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_POSIX_BRACKET | REG_SYN_OP_BACK_REF | \ - REG_SYN_OP_CC | REG_SYN_OP_0INF | REG_SYN_OP_LINE_ANCHOR | \ - REG_SYN_OP_ESC_CONTROL_CHAR ) + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) #define SYN_GNU_REGEX_OP \ - ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_CC | \ - REG_SYN_OP_POSIX_BRACKET | REG_SYN_OP_BACK_REF | \ - REG_SYN_OP_INTERVAL | REG_SYN_OP_SUBEXP | REG_SYN_OP_ALT | \ - REG_SYN_OP_0INF | REG_SYN_OP_1INF | REG_SYN_OP_01 | \ - REG_SYN_OP_ESC_BUF_ANCHOR | REG_SYN_OP_ESC_WORD | \ - REG_SYN_OP_ESC_WORD_BOUND | REG_SYN_OP_ESC_WORD_BEGIN_END | \ - REG_SYN_OP_ESC_WHITE_SPACE | REG_SYN_OP_ESC_DIGIT | \ - REG_SYN_OP_LINE_ANCHOR ) + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) #define SYN_GNU_REGEX_BV \ - ( REG_SYN_CONTEXT_INDEP_ANCHORS | REG_SYN_CONTEXT_INDEP_OPS | \ - REG_SYN_CONTEXT_INVALID_OPS | REG_SYN_ALLOW_INVALID_INTERVAL | \ - REG_SYN_ESCAPE_IN_CC | REG_SYN_ALLOW_RANGE_OP_IN_CC ) + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) #ifdef USE_VARIABLE_SYNTAX -RegSyntaxType RegSyntaxPosixBasic = { - ( SYN_POSIX_COMMON_OP | REG_SYN_OP_ESC_SUBEXP | REG_SYN_OP_ESC_INTERVAL ) +OnigSyntaxType OnigSyntaxPosixBasic = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_BRACE_INTERVAL ) , 0 , 0 - , ( REG_OPTION_SINGLELINE | REG_OPTION_MULTILINE ) + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) }; -RegSyntaxType RegSyntaxPosixExtended = { - ( SYN_POSIX_COMMON_OP | REG_SYN_OP_SUBEXP | REG_SYN_OP_INTERVAL | - REG_SYN_OP_1INF | REG_SYN_OP_01 | REG_SYN_OP_ALT ) +OnigSyntaxType OnigSyntaxPosixExtended = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | + ONIG_SYN_OP_BRACE_INTERVAL | + ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) , 0 - , ( REG_SYN_CONTEXT_INDEP_ANCHORS | - REG_SYN_CONTEXT_INDEP_OPS | REG_SYN_CONTEXT_INVALID_OPS | - REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | REG_SYN_ALLOW_RANGE_OP_IN_CC ) - , ( REG_OPTION_SINGLELINE | REG_OPTION_MULTILINE ) + , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | + ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | + ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) }; -RegSyntaxType RegSyntaxEmacs = { - ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_CC | REG_SYN_OP_ESC_INTERVAL | - REG_SYN_OP_ESC_SUBEXP | REG_SYN_OP_ESC_ALT | - REG_SYN_OP_0INF | REG_SYN_OP_1INF | REG_SYN_OP_01 | - REG_SYN_OP_BACK_REF | REG_SYN_OP_LINE_ANCHOR | - REG_SYN_OP_ESC_GNU_BUF_ANCHOR | REG_SYN_OP_ESC_CONTROL_CHAR ) - , 0 - , REG_SYN_ALLOW_EMPTY_RANGE_IN_CC - , REG_OPTION_NONE +OnigSyntaxType OnigSyntaxEmacs = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | + ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | + ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | + ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) + , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR + , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC + , ONIG_OPTION_NONE }; -RegSyntaxType RegSyntaxGrep = { - ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_CC | REG_SYN_OP_POSIX_BRACKET | - REG_SYN_OP_INTERVAL | REG_SYN_OP_ESC_SUBEXP | REG_SYN_OP_ESC_ALT | - REG_SYN_OP_0INF | REG_SYN_OP_ESC_1INF | REG_SYN_OP_ESC_01 | - REG_SYN_OP_LINE_ANCHOR ) +OnigSyntaxType OnigSyntaxGrep = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | + ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | + ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | + ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) , 0 - , ( REG_SYN_ALLOW_EMPTY_RANGE_IN_CC | REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) - , REG_OPTION_NONE + , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) + , ONIG_OPTION_NONE }; -RegSyntaxType RegSyntaxGnuRegex = { +OnigSyntaxType OnigSyntaxGnuRegex = { SYN_GNU_REGEX_OP , 0 , SYN_GNU_REGEX_BV - , REG_OPTION_NONE + , ONIG_OPTION_NONE }; -RegSyntaxType RegSyntaxJava = { - (( SYN_GNU_REGEX_OP | REG_SYN_OP_NON_GREEDY | REG_SYN_OP_SUBEXP_EFFECT | - REG_SYN_OP_ESC_CONTROL_CHAR | REG_SYN_OP_ESC_C_CONTROL | - REG_SYN_OP_QUOTE | REG_SYN_OP_ESC_OCTAL3 | REG_SYN_OP_ESC_X_HEX2 ) - & ~REG_SYN_OP_ESC_WORD_BEGIN_END ) - , ( REG_SYN_OP2_OPTION_PERL | - REG_SYN_OP2_POSSESSIVE_REPEAT | REG_SYN_OP2_POSSESSIVE_INTERVAL | - REG_SYN_OP2_CCLASS_SET | REG_SYN_OP2_ESC_V_VTAB | - REG_SYN_OP2_ESC_U_HEX4 ) - , ( SYN_GNU_REGEX_BV | REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) - , REG_OPTION_SINGLELINE +OnigSyntaxType OnigSyntaxJava = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | + ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | + ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY ) + , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) + , ONIG_OPTION_SINGLELINE }; -RegSyntaxType RegSyntaxPerl = { - (( SYN_GNU_REGEX_OP | REG_SYN_OP_NON_GREEDY | REG_SYN_OP_SUBEXP_EFFECT | - REG_SYN_OP_ESC_OCTAL3 | REG_SYN_OP_ESC_X_HEX2 | - REG_SYN_OP_ESC_X_BRACE_HEX8 | REG_SYN_OP_ESC_CONTROL_CHAR | - REG_SYN_OP_ESC_C_CONTROL | REG_SYN_OP_QUOTE ) - & ~REG_SYN_OP_ESC_WORD_BEGIN_END ) - , REG_SYN_OP2_OPTION_PERL +OnigSyntaxType OnigSyntaxPerl = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY ) , SYN_GNU_REGEX_BV - , REG_OPTION_SINGLELINE + , ONIG_OPTION_SINGLELINE }; #endif /* USE_VARIABLE_SYNTAX */ -RegSyntaxType RegSyntaxRuby = { - (( SYN_GNU_REGEX_OP | REG_SYN_OP_NON_GREEDY | REG_SYN_OP_SUBEXP_EFFECT | - REG_SYN_OP_ESC_OCTAL3 | REG_SYN_OP_ESC_X_HEX2 | - REG_SYN_OP_ESC_X_BRACE_HEX8 | REG_SYN_OP_ESC_CONTROL_CHAR | - REG_SYN_OP_ESC_C_CONTROL ) - & ~REG_SYN_OP_ESC_WORD_BEGIN_END ) - , ( REG_SYN_OP2_OPTION_RUBY | - REG_SYN_OP2_NAMED_SUBEXP | REG_SYN_OP2_SUBEXP_CALL | - REG_SYN_OP2_POSSESSIVE_REPEAT | REG_SYN_OP2_CCLASS_SET | - REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | - REG_SYN_OP2_ESC_M_BAR_META | REG_SYN_OP2_ESC_V_VTAB ) - , ( SYN_GNU_REGEX_BV | REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED | - REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) - , REG_OPTION_NONE +OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE }; -RegSyntaxType* RegDefaultSyntax = REG_SYNTAX_RUBY; +OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; #ifdef USE_VARIABLE_SYNTAX extern int -regex_set_default_syntax(RegSyntaxType* syntax) +onig_set_default_syntax(OnigSyntaxType* syntax) { if (IS_NULL(syntax)) - syntax = REG_SYNTAX_RUBY; + syntax = ONIG_SYNTAX_RUBY; + + OnigDefaultSyntax = syntax; + return 0; +} + +extern void +onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) +{ + *to = *from; +} + +extern void +onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +{ + syntax->op = op; +} + +extern void +onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +{ + syntax->op2 = op2; +} - RegDefaultSyntax = syntax; +extern void +onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +{ + syntax->behavior = behavior; +} + +extern void +onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) +{ + syntax->options = options; +} +#endif + +OnigMetaCharTableType OnigMetaCharTable = { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )0 /* anychar '.' */ + , (OnigCodePoint )0 /* anytime '*' */ + , (OnigCodePoint )0 /* zero or one time '?' */ + , (OnigCodePoint )0 /* one or more time '+' */ + , (OnigCodePoint )0 /* anychar anytime */ +}; + +#ifdef USE_VARIABLE_META_CHARS +extern int onig_set_meta_char(unsigned int what, unsigned int c) +{ + switch (what) { + case ONIG_META_CHAR_ESCAPE: + OnigMetaCharTable.esc = c; + break; + case ONIG_META_CHAR_ANYCHAR: + OnigMetaCharTable.anychar = c; + break; + case ONIG_META_CHAR_ANYTIME: + OnigMetaCharTable.anytime = c; + break; + case ONIG_META_CHAR_ZERO_OR_ONE_TIME: + OnigMetaCharTable.zero_or_one_time = c; + break; + case ONIG_META_CHAR_ONE_OR_MORE_TIME: + OnigMetaCharTable.one_or_more_time = c; + break; + case ONIG_META_CHAR_ANYCHAR_ANYTIME: + OnigMetaCharTable.anychar_anytime = c; + break; + default: + return ONIGERR_INVALID_ARGUMENT; + break; + } return 0; } +#endif /* USE_VARIABLE_META_CHARS */ + + +extern void onig_null_warn(char* s) { } + +#ifdef DEFAULT_WARN_FUNCTION +static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; +#else +static OnigWarnFunc onig_warn = onig_null_warn; +#endif + +#ifdef DEFAULT_VERB_WARN_FUNCTION +static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; +#else +static OnigWarnFunc onig_verb_warn = onig_null_warn; #endif +extern void onig_set_warn_func(OnigWarnFunc f) +{ + onig_warn = f; +} + +extern void onig_set_verb_warn_func(OnigWarnFunc f) +{ + onig_verb_warn = f; +} + static void bbuf_free(BBuf* bbuf) { @@ -146,7 +262,7 @@ bbuf_clone(BBuf** rto, BBuf* from) BBuf *to; *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(to, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY); r = BBUF_INIT(to, from->alloc); if (r != 0) return r; to->used = from->used; @@ -154,15 +270,13 @@ bbuf_clone(BBuf** rto, BBuf* from) return 0; } -#define WC2MB_MAX_BUFLEN 7 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) #define SET_ALL_MULTI_BYTE_RANGE(pbuf) \ - add_wc_range_to_buf(pbuf, (WCINT )0x80, ~((WCINT )0),\ - (UChar )0x80, (UChar )0xff); + add_code_range_to_buf(pbuf, (OnigCodePoint )0x80, ~((OnigCodePoint )0)) #define ADD_ALL_MULTI_BYTE_RANGE(code, mbuf) do {\ - if (! IS_SINGLEBYTE_CODE(code)) {\ + if (! ONIGENC_IS_SINGLEBYTE(code)) {\ r = SET_ALL_MULTI_BYTE_RANGE(&(mbuf));\ if (r) return r;\ }\ @@ -188,6 +302,7 @@ bitset_set_range(BitSetRef bs, int from, int to) } } +#if 0 static void bitset_set_all(BitSetRef bs) { @@ -196,6 +311,7 @@ bitset_set_all(BitSetRef bs) bs[i] = ~((Bits )0); } } +#endif static void bitset_invert(BitSetRef bs) @@ -242,8 +358,8 @@ bitset_copy(BitSetRef dest, BitSetRef bs) } } -static int -k_strncmp(UChar* s1, UChar* s2, int n) +extern int +onig_strncmp(UChar* s1, UChar* s2, int n) { int x; @@ -265,7 +381,7 @@ k_strcpy(UChar* dest, UChar* src, UChar* end) } extern UChar* -regex_strdup(UChar* s, UChar* end) +onig_strdup(UChar* s, UChar* end) { int len = end - s; @@ -288,174 +404,10 @@ regex_strdup(UChar* s, UChar* end) #define PPEEK (p < end ? *p : PEND_VALUE) #define PEND (p < end ? 0 : 1) -#ifdef REG_RUBY_M17N - -static int -wc2mb_buf(WCINT wc, UChar **bufs, UChar **bufe, RegCharEncoding enc) -{ - int c, len; - - c = m17n_firstbyte(enc, wc); - len = mblen(enc, c); - if (len > (*bufe - *bufs)) { - *bufs = xmalloc(len); - CHECK_NULL_RETURN_VAL(*bufs, REGERR_MEMORY); - } - m17n_mbcput(enc, wc, *bufs); - *bufe = *bufs + len; - return 0; -} - -#else /* REG_RUBY_M17N */ - -extern int -regex_wc2mb_first(RegCharEncoding code, WCINT wc) -{ - if (code == REGCODE_ASCII) { - return (wc & 0xff); - } - else if (code == REGCODE_UTF8) { - if ((wc & 0xffffff80) == 0) - return wc; - else { - if ((wc & 0xfffff800) == 0) - return ((wc>>6)& 0x1f) | 0xc0; - else if ((wc & 0xffff0000) == 0) - return ((wc>>12) & 0x0f) | 0xe0; - else if ((wc & 0xffe00000) == 0) - return ((wc>>18) & 0x07) | 0xf0; - else if ((wc & 0xfc000000) == 0) - return ((wc>>24) & 0x03) | 0xf8; - else if ((wc & 0x80000000) == 0) - return ((wc>>30) & 0x01) | 0xfc; - else { - return REGERR_TOO_BIG_WIDE_CHAR_VALUE; - } - } - } - else { - int first; - - if ((wc & 0xff0000) != 0) { - first = (wc >> 16) & 0xff; - if (mblen(code, first) != 3) - return REGERR_INVALID_WIDE_CHAR_VALUE; - } - else if ((wc & 0xff00) != 0) { - first = (wc >> 8) & 0xff; - if (mblen(code, first) != 2) - return REGERR_INVALID_WIDE_CHAR_VALUE; - } - else { - if (mblen(code, wc) != 1) - return REGERR_INVALID_WIDE_CHAR_VALUE; - return wc; - } - return first; - } -} - -static int -wc2mb(WCINT wc, UChar buf[], RegCharEncoding code) -{ -#define UTF8_TRAILS(wc, shift) ((((wc) >> (shift)) & 0x3f) | 0x80) -#define UTF8_TRAIL0(wc) (((wc) & 0x3f) | 0x80) - - UChar *p = buf; - - if (code == REGCODE_UTF8) { - if ((wc & 0xffffff80) == 0) - *p++ = wc; - else { - if ((wc & 0xfffff800) == 0) { - *p++ = ((wc>>6)& 0x1f) | 0xc0; - } - else if ((wc & 0xffff0000) == 0) { - *p++ = ((wc>>12) & 0x0f) | 0xe0; - *p++ = UTF8_TRAILS(wc, 6); - } - else if ((wc & 0xffe00000) == 0) { - *p++ = ((wc>>18) & 0x07) | 0xf0; - *p++ = UTF8_TRAILS(wc, 12); - *p++ = UTF8_TRAILS(wc, 6); - } - else if ((wc & 0xfc000000) == 0) { - *p++ = ((wc>>24) & 0x03) | 0xf8; - *p++ = UTF8_TRAILS(wc, 18); - *p++ = UTF8_TRAILS(wc, 12); - *p++ = UTF8_TRAILS(wc, 6); - } - else if ((wc & 0x80000000) == 0) { - *p++ = ((wc>>30) & 0x01) | 0xfc; - *p++ = UTF8_TRAILS(wc, 24); - *p++ = UTF8_TRAILS(wc, 18); - *p++ = UTF8_TRAILS(wc, 12); - *p++ = UTF8_TRAILS(wc, 6); - } - else { - return REGERR_TOO_BIG_WIDE_CHAR_VALUE; - } - *p++ = UTF8_TRAIL0(wc); - } - } - else { - if ((wc & 0xff0000) != 0) *p++ = ((wc >> 16) & 0xff); - if ((wc & 0xff00) != 0) *p++ = ((wc >> 8) & 0xff); - *p++ = (wc & 0xff); - - if (mblen(code, buf[0]) != (p - buf)) - return REGERR_INVALID_WIDE_CHAR_VALUE; - } - - return p - buf; -} - -static int -wc2mb_buf(WCINT wc, UChar **bufs, UChar **bufe, RegCharEncoding code) -{ - int r; - r = wc2mb(wc, *bufs, code); - if (r < 0) return r; - - *bufe = (*bufs) + r; - return 0; -} -#endif /* not REG_RUBY_M17N */ - -/* used as function pointer value */ -static int -is_code_ascii(RegCharEncoding code, UChar c) -{ - return (c < 128 ? 1 : 0); -} - -static int -is_code_graph(RegCharEncoding code, UChar c) { return IS_CODE_GRAPH(code, c); } -static int -is_code_print(RegCharEncoding code, UChar c) { return IS_CODE_PRINT(code, c); } -static int -is_code_alnum(RegCharEncoding code, UChar c) { return IS_CODE_ALNUM(code, c); } -static int -is_code_alpha(RegCharEncoding code, UChar c) { return IS_CODE_ALPHA(code, c); } -static int -is_code_lower(RegCharEncoding code, UChar c) { return IS_CODE_LOWER(code, c); } -static int -is_code_upper(RegCharEncoding code, UChar c) { return IS_CODE_UPPER(code, c); } -static int -is_code_cntrl(RegCharEncoding code, UChar c) { return IS_CODE_CNTRL(code, c); } -static int -is_code_punct(RegCharEncoding code, UChar c) { return IS_CODE_PUNCT(code, c); } -static int -is_code_space(RegCharEncoding code, UChar c) { return IS_CODE_SPACE(code, c); } -static int -is_code_blank(RegCharEncoding code, UChar c) { return IS_CODE_BLANK(code, c); } -static int -is_code_digit(RegCharEncoding code, UChar c) { return IS_CODE_DIGIT(code, c); } -static int -is_code_xdigit(RegCharEncoding code, UChar c) { return IS_CODE_XDIGIT(code, c); } static UChar* -k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end, int capa) +k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end, + int capa) { UChar* r; @@ -483,7 +435,7 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end, return r; } -#ifdef USE_NAMED_SUBEXP +#ifdef USE_NAMED_GROUP #define INIT_NAME_BACKREFS_ALLOC_NUM 8 @@ -506,7 +458,7 @@ typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ #define NAMEBUF_SIZE 24 #define NAMEBUF_SIZE_1 25 -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG static int i_print_name_entry(UChar* key, NameEntry* e, void* arg) { @@ -529,7 +481,7 @@ i_print_name_entry(UChar* key, NameEntry* e, void* arg) } extern int -regex_print_names(FILE* fp, regex_t* reg) +onig_print_names(FILE* fp, regex_t* reg) { NameTable* t = (NameTable* )reg->name_table; @@ -562,7 +514,7 @@ names_clear(regex_t* reg) } extern int -regex_names_free(regex_t* reg) +onig_names_free(regex_t* reg) { int r; NameTable* t; @@ -599,7 +551,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) key = namebuf; } else { - key = regex_strdup(name, name_end); + key = onig_strdup(name, name_end); if (IS_NULL(key)) return (NameEntry* )NULL; } } @@ -611,7 +563,8 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) } typedef struct { - int (*func)(UChar*,int,int*,void*); + int (*func)(UChar*,UChar*,int,int*,regex_t*,void*); + regex_t* reg; void* arg; int ret; } INamesArg; @@ -619,8 +572,9 @@ typedef struct { static int i_names(UChar* key, NameEntry* e, INamesArg* arg) { - int r = (*(arg->func))(e->name, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), arg->arg); + int r = (*(arg->func))(e->name, e->name + strlen(e->name), e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); if (r != 0) { arg->ret = r; return ST_STOP; @@ -629,7 +583,9 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg) } extern int -regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg) +onig_foreach_name(regex_t* reg, + int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), + void* arg) { INamesArg narg; NameTable* t = (NameTable* )reg->name_table; @@ -637,12 +593,24 @@ regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg) narg.ret = 0; if (IS_NOT_NULL(t)) { narg.func = func; + narg.reg = reg; narg.arg = arg; st_foreach(t, i_names, (HashDataType )&narg); } return narg.ret; } +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num_entries; + else + return 0; +} + #else /* USE_ST_HASH_TABLE */ #define INIT_NAMES_ALLOC_NUM 8 @@ -654,9 +622,9 @@ typedef struct { } NameTable; -#ifdef REG_DEBUG +#ifdef ONIG_DEBUG extern int -regex_print_names(FILE* fp, regex_t* reg) +onig_print_names(FILE* fp, regex_t* reg) { int i, j; NameEntry* e; @@ -707,13 +675,17 @@ names_clear(regex_t* reg) e->back_refs = (int* )NULL; } } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } t->num = 0; } return 0; } extern int -regex_names_free(regex_t* reg) +onig_names_free(regex_t* reg) { int r; NameTable* t; @@ -738,7 +710,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) len = name_end - name; for (i = 0; i < t->num; i++) { e = &(t->e[i]); - if (len == e->name_len && k_strncmp(name, e->name, len) == 0) + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) return e; } } @@ -746,7 +718,9 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) } extern int -regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg) +onig_foreach_name(regex_t* reg, + int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), + void* arg) { int i, r; NameEntry* e; @@ -755,25 +729,37 @@ regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg) if (IS_NOT_NULL(t)) { for (i = 0; i < t->num; i++) { e = &(t->e[i]); - r = (*func)(e->name, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), arg); + r = (*func)(e->name, e->name + e->name_len, e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); if (r != 0) return r; } } return 0; } +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num; + else + return 0; +} + #endif /* else USE_ST_HASH_TABLE */ static int -name_add(regex_t* reg, UChar* name, UChar* name_end, int backref) +name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) { int alloc; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; if (name_end - name <= 0) - return REGERR_INVALID_SUBEXP_NAME; + return ONIGERR_EMPTY_GROUP_NAME; e = name_find(reg, name, name_end); if (IS_NULL(e)) { @@ -782,10 +768,10 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref) reg->name_table = t = st_init_strtable(); } e = (NameEntry* )xmalloc(sizeof(NameEntry)); - CHECK_NULL_RETURN_VAL(e, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY); - e->name = regex_strdup(name, name_end); - if (IS_NULL(e->name)) return REGERR_MEMORY; + e->name = onig_strdup(name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; st_insert(t, (HashDataType )e->name, (HashDataType )e); e->name_len = name_end - name; @@ -798,7 +784,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref) if (IS_NULL(t)) { alloc = INIT_NAMES_ALLOC_NUM; t = (NameTable* )xmalloc(sizeof(NameTable)); - CHECK_NULL_RETURN_VAL(t, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY); t->e = NULL; t->alloc = 0; t->num = 0; @@ -806,7 +792,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref) t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); if (IS_NULL(t->e)) { xfree(t); - return REGERR_MEMORY; + return ONIGERR_MEMORY; } t->alloc = alloc; reg->name_table = t; @@ -817,7 +803,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref) alloc = t->alloc * 2; t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_VAL(t->e, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY); t->alloc = alloc; clear: @@ -831,44 +817,53 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref) } e = &(t->e[t->num]); t->num++; - e->name = regex_strdup(name, name_end); + e->name = onig_strdup(name, name_end); e->name_len = name_end - name; #endif } + if (e->back_num >= 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, + name, name_end); + return ONIGERR_MULTIPLEX_DEFINED_NAME; + } + e->back_num++; if (e->back_num == 1) { e->back_ref1 = backref; } - else if (e->back_num == 2) { - alloc = INIT_NAME_BACKREFS_ALLOC_NUM; - e->back_refs = (int* )xmalloc(sizeof(int) * alloc); - CHECK_NULL_RETURN_VAL(e->back_refs, REGERR_MEMORY); - e->back_alloc = alloc; - e->back_refs[0] = e->back_ref1; - e->back_refs[1] = backref; - } else { - if (e->back_num > e->back_alloc) { - alloc = e->back_alloc * 2; - e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_VAL(e->back_refs, REGERR_MEMORY); + if (e->back_num == 2) { + alloc = INIT_NAME_BACKREFS_ALLOC_NUM; + e->back_refs = (int* )xmalloc(sizeof(int) * alloc); + CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); e->back_alloc = alloc; + e->back_refs[0] = e->back_ref1; + e->back_refs[1] = backref; + } + else { + if (e->back_num > e->back_alloc) { + alloc = e->back_alloc * 2; + e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + e->back_alloc = alloc; + } + e->back_refs[e->back_num - 1] = backref; } - e->back_refs[e->back_num - 1] = backref; } return 0; } extern int -regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, +onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, int** nums) { NameEntry* e; e = name_find(reg, name, name_end); - if (IS_NULL(e)) return REGERR_UNDEFINED_NAME_REFERENCE; + if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; switch (e->back_num) { case 0: @@ -883,21 +878,60 @@ regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, return e->back_num; } -#else +extern int +onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end, + OnigRegion *region) +{ + int i, n, *nums; + + n = onig_name_to_group_numbers(reg, name, name_end, &nums); + if (n < 0) + return n; + else if (n == 0) + return ONIGERR_PARSER_BUG; + else if (n == 1) + return nums[0]; + else { + if (IS_NOT_NULL(region)) { + for (i = n - 1; i >= 0; i--) { + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; + } + } + return nums[n - 1]; + } +} + +#else /* USE_NAMED_GROUP */ extern int -regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, +onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, int** nums) { - return REG_NO_SUPPORT_CONFIG; + return ONIG_NO_SUPPORT_CONFIG; } extern int -regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg) +onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end, + OnigRegion* region) { - return REG_NO_SUPPORT_CONFIG; + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), + void* arg) +{ + return ONIG_NO_SUPPORT_CONFIG; } -#endif + +extern int +onig_number_of_names(regex_t* reg) +{ + return 0; +} +#endif /* else USE_NAMED_GROUP */ #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 @@ -907,12 +941,17 @@ scan_env_clear(ScanEnv* env) { int i; - BIT_STATUS_CLEAR(env->backtrack_mem); + BIT_STATUS_CLEAR(env->capture_history); + BIT_STATUS_CLEAR(env->bt_mem_start); + BIT_STATUS_CLEAR(env->bt_mem_end); BIT_STATUS_CLEAR(env->backrefed_mem); env->error = (UChar* )NULL; env->error_end = (UChar* )NULL; env->num_call = 0; env->num_mem = 0; +#ifdef USE_NAMED_GROUP + env->num_named = 0; +#endif env->mem_alloc = 0; env->mem_nodes_dynamic = (Node** )NULL; @@ -939,7 +978,7 @@ scan_env_add_mem_entry(ScanEnv* env) alloc = env->mem_alloc * 2; p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); } - CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); for (i = env->num_mem + 1; i < alloc; i++) p[i] = NULL_NODE; @@ -959,7 +998,7 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) if (env->num_mem >= num) SCANENV_MEM_NODES(env)[num] = node; else - return REGERR_INVALID_BACKREF; + return ONIGERR_PARSER_BUG; return 0; } @@ -973,7 +1012,7 @@ static FreeNode* FreeNodeList = (FreeNode* )NULL; #endif extern void -regex_node_free(Node* node) +onig_node_free(Node* node) { if (IS_NULL(node)) return ; @@ -986,8 +1025,8 @@ regex_node_free(Node* node) case N_LIST: case N_ALT: - regex_node_free(NCONS(node).left); - regex_node_free(NCONS(node).right); + onig_node_free(NCONS(node).left); + onig_node_free(NCONS(node).right); break; case N_CCLASS: @@ -997,12 +1036,12 @@ regex_node_free(Node* node) case N_QUALIFIER: if (NQUALIFIER(node).target) - regex_node_free(NQUALIFIER(node).target); + onig_node_free(NQUALIFIER(node).target); break; case N_EFFECT: if (NEFFECT(node).target) - regex_node_free(NEFFECT(node).target); + onig_node_free(NEFFECT(node).target); break; case N_BACKREF: @@ -1012,7 +1051,7 @@ regex_node_free(Node* node) case N_ANCHOR: if (NANCHOR(node).target) - regex_node_free(NANCHOR(node).target); + onig_node_free(NANCHOR(node).target); break; } @@ -1031,7 +1070,7 @@ regex_node_free(Node* node) #ifdef USE_RECYCLE_NODE extern int -regex_free_node_list() +onig_free_node_list() { FreeNode* n; @@ -1125,7 +1164,7 @@ node_new_alt(Node* left, Node* right) } extern Node* -regex_node_new_anchor(int type) +onig_node_new_anchor(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1137,7 +1176,7 @@ regex_node_new_anchor(int type) } static Node* -node_new_backref(int back_num, int* backrefs, ScanEnv* env) +node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) { int i; Node* node = node_new(); @@ -1147,6 +1186,8 @@ node_new_backref(int back_num, int* backrefs, ScanEnv* env) NBACKREF(node).state = 0; NBACKREF(node).back_num = back_num; NBACKREF(node).back_dynamic = (int* )NULL; + if (by_name != 0) + NBACKREF(node).state |= NST_NAME_REF; for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && @@ -1163,7 +1204,7 @@ node_new_backref(int back_num, int* backrefs, ScanEnv* env) else { int* p = (int* )xmalloc(sizeof(int) * back_num); if (IS_NULL(p)) { - regex_node_free(node); + onig_node_free(node); return NULL; } NBACKREF(node).back_dynamic = p; @@ -1200,11 +1241,11 @@ node_new_qualifier(int lower, int upper, int by_number) NQUALIFIER(node).lower = lower; NQUALIFIER(node).upper = upper; NQUALIFIER(node).greedy = 1; - NQUALIFIER(node).by_number = by_number; - NQUALIFIER(node).target_may_empty = 0; - NQUALIFIER(node).head_exact = NULL_NODE; - NQUALIFIER(node).next_head_exact = NULL_NODE; - NQUALIFIER(node).is_refered = 0; + NQUALIFIER(node).by_number = by_number; + NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQUALIFIER(node).head_exact = NULL_NODE; + NQUALIFIER(node).next_head_exact = NULL_NODE; + NQUALIFIER(node).is_refered = 0; return node; } @@ -1225,13 +1266,27 @@ node_new_effect(int type) } extern Node* -regex_node_new_effect(int type) +onig_node_new_effect(int type) { return node_new_effect(type); } static Node* -node_new_option(RegOptionType option) +node_new_effect_memory(OnigOptionType option, int is_named) +{ + Node* node = node_new_effect(EFFECT_MEMORY); + CHECK_NULL_RETURN(node); + if (is_named != 0) + SET_EFFECT_STATUS(node, NST_NAMED_GROUP); + +#ifdef USE_SUBEXP_CALL + NEFFECT(node).option = option; +#endif + return node; +} + +static Node* +node_new_option(OnigOptionType option) { Node* node = node_new_effect(EFFECT_OPTION); CHECK_NULL_RETURN(node); @@ -1240,7 +1295,7 @@ node_new_option(RegOptionType option) } extern int -regex_node_str_cat(Node* node, UChar* s, UChar* end) +onig_node_str_cat(Node* node, UChar* s, UChar* end) { int addlen = end - s; @@ -1261,7 +1316,7 @@ regex_node_str_cat(Node* node, UChar* s, UChar* end) else p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa); - CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); NSTRING(node).s = p; NSTRING(node).capa = capa; } @@ -1281,11 +1336,11 @@ node_str_cat_char(Node* node, UChar c) UChar s[1]; s[0] = c; - return regex_node_str_cat(node, s, s + 1); + return onig_node_str_cat(node, s, s + 1); } extern void -regex_node_conv_to_str_node(Node* node, int flag) +onig_node_conv_to_str_node(Node* node, int flag) { node->type = N_STRING; @@ -1306,8 +1361,8 @@ node_new_str(UChar* s, UChar* end) NSTRING(node).flag = 0; NSTRING(node).s = NSTRING(node).buf; NSTRING(node).end = NSTRING(node).buf; - if (regex_node_str_cat(node, s, end)) { - regex_node_free(node); + if (onig_node_str_cat(node, s, end)) { + onig_node_free(node); return NULL; } return node; @@ -1346,13 +1401,13 @@ node_new_str_raw_char(UChar c) } static Node* -str_node_split_last_char(StrNode* sn, RegCharEncoding enc) +str_node_split_last_char(StrNode* sn, OnigEncoding enc) { UChar *p; Node* n = NULL_NODE; if (sn->end > sn->s) { - p = regex_get_prev_char_head(enc, sn->s, sn->end); + p = onigenc_get_prev_char_head(enc, sn->s, sn->end); if (p && p > sn->s) { /* can be splitted. */ n = node_new_str(p, sn->end); if ((sn->flag & NSTR_RAW) != 0) @@ -1364,16 +1419,16 @@ str_node_split_last_char(StrNode* sn, RegCharEncoding enc) } static int -str_node_can_be_split(StrNode* sn, RegCharEncoding enc) +str_node_can_be_split(StrNode* sn, OnigEncoding enc) { if (sn->end > sn->s) { - return ((mblen(enc, *(sn->s)) < sn->end - sn->s) ? 1 : 0); + return ((enc_len(enc, *(sn->s)) < sn->end - sn->s) ? 1 : 0); } return 0; } extern int -regex_scan_unsigned_number(UChar** src, UChar* end, RegCharEncoding enc) +onig_scan_unsigned_number(UChar** src, UChar* end, OnigEncoding enc) { unsigned int num, val; int c; @@ -1382,7 +1437,7 @@ regex_scan_unsigned_number(UChar** src, UChar* end, RegCharEncoding enc) num = 0; while (!PEND) { PFETCH(c); - if (IS_CODE_DIGIT(enc, c)) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { val = (unsigned int )DIGITVAL(c); if ((INT_MAX_LIMIT - val) / 10UL < num) return -1; /* overflow */ @@ -1400,7 +1455,7 @@ regex_scan_unsigned_number(UChar** src, UChar* end, RegCharEncoding enc) static int scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - RegCharEncoding enc) + OnigEncoding enc) { int c; unsigned int num, val; @@ -1409,12 +1464,12 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, num = 0; while (!PEND && maxlen-- != 0) { PFETCH(c); - if (IS_CODE_XDIGIT(enc, c)) { - val = (unsigned int )XDIGITVAL(c); + if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + val = (unsigned int )XDIGITVAL(enc,c); if ((INT_MAX_LIMIT - val) / 16UL < num) return -1; /* overflow */ - num = (num << 4) + XDIGITVAL(c); + num = (num << 4) + XDIGITVAL(enc,c); } else { PUNFETCH; @@ -1427,7 +1482,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, static int scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, - RegCharEncoding enc) + OnigEncoding enc) { int c; unsigned int num, val; @@ -1436,7 +1491,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, num = 0; while (!PEND && maxlen-- != 0) { PFETCH(c); - if (IS_CODE_ODIGIT(enc, c)) { + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { val = ODIGITVAL(c); if ((INT_MAX_LIMIT - val) / 8UL < num) return -1; /* overflow */ @@ -1453,38 +1508,37 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, } -#define BBUF_WRITE_WCINT(bbuf,pos,wc) \ - BBUF_WRITE(bbuf, pos, &(wc), SIZE_WCINT) +#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ + BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) /* data format: - [multi-byte-head-BitSet][n][from-1][to-1][from-2][to-2] ... [from-n][to-n] - (all data size is WCINT) + [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] + (all data size is OnigCodePoint) */ static int -new_wc_range(BBuf** pbuf) +new_code_range(BBuf** pbuf) { -#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_WCINT * 5) +#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) int r; - WCINT n; + OnigCodePoint n; BBuf* bbuf; bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(*pbuf, REGERR_MEMORY); - r = BBUF_INIT(*pbuf, SIZE_BITSET + INIT_MULTI_BYTE_RANGE_SIZE); + CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY); + r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); if (r) return r; n = 0; - BBUF_WRITE_WCINT(bbuf, SIZE_BITSET, n); - BITSET_CLEAR((BitSetRef )bbuf->p); + BBUF_WRITE_CODE_POINT(bbuf, 0, n); return 0; } static int -add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto) +add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) { int r, inc_n, pos; int low, high, bound, x; - WCINT n, *data; + OnigCodePoint n, *data; BBuf* bbuf; if (from > to) { @@ -1492,16 +1546,16 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto) } if (IS_NULL(*pbuf)) { - r = new_wc_range(pbuf); + r = new_code_range(pbuf); if (r) return r; bbuf = *pbuf; n = 0; } else { bbuf = *pbuf; - GET_WCINT(n, bbuf->p + SIZE_BITSET); + GET_CODE_POINT(n, bbuf->p); } - data = (WCINT* )(bbuf->p + SIZE_BITSET); + data = (OnigCodePoint* )(bbuf->p); data++; for (low = 0, bound = n; low < bound; ) { @@ -1521,8 +1575,8 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto) } inc_n = low + 1 - high; - if (n + inc_n > REG_MAX_MULTI_BYTE_RANGES_NUM) - return REGERR_TOO_MANY_MULTI_BYTE_RANGES; + if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) + return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; if (inc_n != 1) { if (from > data[low*2]) @@ -1532,9 +1586,9 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto) } if (inc_n != 0 && high < n) { - int from_pos = SIZE_BITSET + SIZE_WCINT * (1 + high * 2); - int to_pos = SIZE_BITSET + SIZE_WCINT * (1 + (low + 1) * 2); - int size = (n - high) * 2 * SIZE_WCINT; + int from_pos = SIZE_CODE_POINT * (1 + high * 2); + int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); + int size = (n - high) * 2 * SIZE_CODE_POINT; if (inc_n > 0) { BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); @@ -1544,52 +1598,34 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto) } } - pos = SIZE_BITSET + SIZE_WCINT * (1 + low * 2); - BBUF_ENSURE_SIZE(bbuf, pos + SIZE_WCINT * 2); - BBUF_WRITE_WCINT(bbuf, pos, from); - BBUF_WRITE_WCINT(bbuf, pos + SIZE_WCINT, to); + pos = SIZE_CODE_POINT * (1 + low * 2); + BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BBUF_WRITE_CODE_POINT(bbuf, pos, from); + BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); n += inc_n; - BBUF_WRITE_WCINT(bbuf, SIZE_BITSET, n); + BBUF_WRITE_CODE_POINT(bbuf, 0, n); - if (inc_n > 0) { - int i; - UChar tmp; - - if (cfrom > cto) { - tmp = cfrom; cfrom = cto; cto = tmp; - } - - for (i = cfrom; i <= cto; i++) { - BITSET_SET_BIT((BitSetRef)bbuf->p, i); - } - } return 0; } static int -add_wc_range(BBuf** pbuf, ScanEnv* env, WCINT from, WCINT to) +add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) { - int cfrom, cto; - if (from > to) { - if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) return 0; else - return REGERR_EMPTY_RANGE_IN_CHAR_CLASS; + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - cfrom = WC2MB_FIRST(env->enc, from); - if (cfrom < 0) return cfrom; - cto = WC2MB_FIRST(env->enc, to); - if (cto < 0) return cto; - return add_wc_range_to_buf(pbuf, from, to, (UChar )cfrom, (UChar )cto); + return add_code_range_to_buf(pbuf, from, to); } static int -not_wc_range_buf(BBuf* bbuf, BBuf** pbuf) +not_code_range_buf(BBuf* bbuf, BBuf** pbuf) { int r, i, n; - WCINT pre, from, to, *data; + OnigCodePoint pre, from, to, *data; *pbuf = (BBuf* )NULL; if (IS_NULL(bbuf)) { @@ -1597,8 +1633,8 @@ not_wc_range_buf(BBuf* bbuf, BBuf** pbuf) return SET_ALL_MULTI_BYTE_RANGE(pbuf); } - data = (WCINT* )(bbuf->p + SIZE_BITSET); - GET_WCINT(n, data); + data = (OnigCodePoint* )(bbuf->p); + GET_CODE_POINT(n, data); data++; if (n <= 0) goto set_all; @@ -1608,14 +1644,14 @@ not_wc_range_buf(BBuf* bbuf, BBuf** pbuf) from = data[i*2]; to = data[i*2+1]; if (pre <= from - 1) { - r = add_wc_range_to_buf(pbuf, pre, from - 1, (UChar )0, (UChar )0); + r = add_code_range_to_buf(pbuf, pre, from - 1); if (r != 0) return r; } - if (to == ~((WCINT )0)) break; + if (to == ~((OnigCodePoint )0)) break; pre = to + 1; } - if (to < ~((WCINT )0)) { - r = add_wc_range_to_buf(pbuf, to + 1, ~((WCINT )0), (UChar )0, (UChar )0); + if (to < ~((OnigCodePoint )0)) { + r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); } return r; } @@ -1628,11 +1664,11 @@ not_wc_range_buf(BBuf* bbuf, BBuf** pbuf) } while (0) static int -or_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) { int i, r; - WCINT n1, *data1; - WCINT from, to; + OnigCodePoint n1, *data1; + OnigCodePoint from, to; *pbuf = (BBuf* )NULL; if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { @@ -1654,7 +1690,7 @@ or_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) return bbuf_clone(pbuf, bbuf2); } else { - return not_wc_range_buf(bbuf2, pbuf); + return not_code_range_buf(bbuf2, pbuf); } } } @@ -1662,32 +1698,33 @@ or_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) if (not1 != 0) SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - data1 = (WCINT* )(bbuf1->p + SIZE_BITSET); - GET_WCINT(n1, data1); + data1 = (OnigCodePoint* )(bbuf1->p); + GET_CODE_POINT(n1, data1); data1++; if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ r = bbuf_clone(pbuf, bbuf2); } else if (not1 == 0) { /* 1 OR (not 2) */ - r = not_wc_range_buf(bbuf2, pbuf); + r = not_code_range_buf(bbuf2, pbuf); } if (r != 0) return r; for (i = 0; i < n1; i++) { from = data1[i*2]; to = data1[i*2+1]; - r = add_wc_range_to_buf(pbuf, from, to, (UChar )0, (UChar )0); + r = add_code_range_to_buf(pbuf, from, to); if (r != 0) return r; } return 0; } static int -and_wc_range1(BBuf** pbuf, WCINT from1, WCINT to1, WCINT* data, int n) +and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, + OnigCodePoint* data, int n) { int i, r; - WCINT from2, to2; + OnigCodePoint from2, to2; for (i = 0; i < n; i++) { from2 = data[i*2]; @@ -1701,7 +1738,7 @@ and_wc_range1(BBuf** pbuf, WCINT from1, WCINT to1, WCINT* data, int n) else if (from2 <= to1) { if (to2 < to1) { if (from1 <= from2 - 1) { - r = add_wc_range_to_buf(pbuf, from1, from2-1, (UChar )0, (UChar )0); + r = add_code_range_to_buf(pbuf, from1, from2-1); if (r != 0) return r; } from1 = to2 + 1; @@ -1716,18 +1753,18 @@ and_wc_range1(BBuf** pbuf, WCINT from1, WCINT to1, WCINT* data, int n) if (from1 > to1) break; } if (from1 <= to1) { - r = add_wc_range_to_buf(pbuf, from1, to1, (UChar )0, (UChar )0); + r = add_code_range_to_buf(pbuf, from1, to1); if (r != 0) return r; } return 0; } static int -and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) { int i, j, r; - WCINT n1, n2, *data1, *data2; - WCINT from, to, from1, to1, from2, to2; + OnigCodePoint n1, n2, *data1, *data2; + OnigCodePoint from, to, from1, to1, from2, to2; *pbuf = (BBuf* )NULL; if (IS_NULL(bbuf1)) { @@ -1744,10 +1781,10 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) if (not1 != 0) SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - data1 = (WCINT* )(bbuf1->p + SIZE_BITSET); - data2 = (WCINT* )(bbuf2->p + SIZE_BITSET); - GET_WCINT(n1, data1); - GET_WCINT(n2, data2); + data1 = (OnigCodePoint* )(bbuf1->p); + data2 = (OnigCodePoint* )(bbuf2->p); + GET_CODE_POINT(n1, data1); + GET_CODE_POINT(n2, data2); data1++; data2++; @@ -1762,7 +1799,7 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) if (to2 < from1) continue; from = MAX(from1, from2); to = MIN(to1, to2); - r = add_wc_range_to_buf(pbuf, from, to, (UChar )0, (UChar )0); + r = add_code_range_to_buf(pbuf, from, to); if (r != 0) return r; } } @@ -1771,7 +1808,7 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) for (i = 0; i < n1; i++) { from1 = data1[i*2]; to1 = data1[i*2+1]; - r = and_wc_range1(pbuf, from1, to1, data2, n2); + r = and_code_range1(pbuf, from1, to1, data2, n2); if (r != 0) return r; } } @@ -1780,7 +1817,7 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) } static int -and_cclass(CClassNode* dest, CClassNode* cc) +and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) { int r, not1, not2; BBuf *buf1, *buf2, *pbuf; @@ -1811,34 +1848,34 @@ and_cclass(CClassNode* dest, CClassNode* cc) bitset_invert(dest->bs); } - if (not1 != 0 && not2 != 0) { - r = or_wc_range_buf(buf1, 0, buf2, 0, &pbuf); - } - else { - r = and_wc_range_buf(buf1, not1, buf2, not2, &pbuf); - if (r == 0 && not1 != 0) { - BBuf *tbuf; - r = not_wc_range_buf(pbuf, &tbuf); - if (r != 0) { + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = or_code_range_buf(buf1, 0, buf2, 0, &pbuf); + } + else { + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } bbuf_free(pbuf); - return r; + pbuf = tbuf; } - bbuf_free(pbuf); - pbuf = tbuf; } - } - if (r != 0) return r; + if (r != 0) return r; - dest->mbuf = pbuf; - bbuf_free(buf1); - if (IS_NOT_NULL(pbuf)) { - bitset_set_all((BitSetRef )pbuf->p); /* Sorry, but I'm tired. */ + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; } - return r; + return 0; } static int -or_cclass(CClassNode* dest, CClassNode* cc) +or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) { int r, not1, not2; BBuf *buf1, *buf2, *pbuf; @@ -1869,36 +1906,37 @@ or_cclass(CClassNode* dest, CClassNode* cc) bitset_invert(dest->bs); } - if (not1 != 0 && not2 != 0) { - r = and_wc_range_buf(buf1, 0, buf2, 0, &pbuf); - } - else { - r = or_wc_range_buf(buf1, not1, buf2, not2, &pbuf); - if (r == 0 && not1 != 0) { - BBuf *tbuf; - r = not_wc_range_buf(pbuf, &tbuf); - if (r != 0) { + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); + } + else { + r = or_code_range_buf(buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } bbuf_free(pbuf); - return r; + pbuf = tbuf; } - bbuf_free(pbuf); - pbuf = tbuf; } - } - if (r != 0) return r; + if (r != 0) return r; - dest->mbuf = pbuf; - bbuf_free(buf1); - if (IS_NOT_NULL(pbuf)) { - bitset_set_all((BitSetRef )pbuf->p); /* Sorry, but I'm tired. */ + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; } - return r; + else + return 0; } static int conv_backslash_value(int c, ScanEnv* env) { - if (IS_SYNTAX_OP(env->syntax, REG_SYN_OP_ESC_CONTROL_CHAR)) { + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { switch (c) { case 'n': return '\n'; case 't': return '\t'; @@ -1908,7 +1946,7 @@ conv_backslash_value(int c, ScanEnv* env) case 'b': return '\010'; case 'e': return '\033'; case 'v': - if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_ESC_V_VTAB)) + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) return '\v'; break; @@ -1976,8 +2014,8 @@ popular_qualifier_num(QualifierNode* qf) return -1; } -static void -reduce_nested_qualifier(Node* pnode, Node* cnode) +extern void +onig_reduce_nested_qualifier(Node* pnode, Node* cnode) { #define NQ_ASIS 0 /* as is */ #define NQ_DEL 1 /* delete parent */ @@ -2039,7 +2077,7 @@ reduce_nested_qualifier(Node* pnode, Node* cnode) } c->target = NULL_NODE; - regex_node_free(cnode); + onig_node_free(cnode); } @@ -2047,7 +2085,7 @@ enum TokenSyms { TK_EOT = 0, /* end of token */ TK_BYTE = 1, TK_RAW_BYTE = 2, - TK_WC, + TK_CODE_POINT, TK_ANYCHAR, TK_CHAR_TYPE, TK_BACKREF, @@ -2055,11 +2093,13 @@ enum TokenSyms { TK_ANCHOR, TK_OP_REPEAT, TK_INTERVAL, + TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ TK_ALT, TK_SUBEXP_OPEN, TK_SUBEXP_CLOSE, TK_CC_OPEN, TK_QUOTE_OPEN, + TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ /* in cc */ TK_CC_CLOSE, TK_CC_RANGE, @@ -2075,7 +2115,7 @@ typedef struct { UChar* backp; union { int c; - WCINT wc; + OnigCodePoint code; int anchor; int subtype; struct { @@ -2088,71 +2128,90 @@ typedef struct { int num; int ref1; int* refs; + int by_name; } backref; struct { UChar* name; UChar* name_end; } call; + struct { + int not; + } prop; } u; -} RegToken; +} OnigToken; static int -fetch_range_qualifier(UChar** src, UChar* end, RegToken* tok, ScanEnv* env) +fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) { - int low, up, syn_allow; + int low, up, syn_allow, non_low = 0; int c; UChar* p = *src; - syn_allow = IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_INVALID_INTERVAL); + syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); if (PEND) { if (syn_allow) return 1; /* "....{" : OK! */ else - return REGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ } if (! syn_allow) { c = PPEEK; if (c == ')' || c == '(' || c == '|') { - return REGERR_END_PATTERN_AT_LEFT_BRACE; + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; } } - low = regex_scan_unsigned_number(&p, end, env->enc); - if (low < 0) return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (low > REG_MAX_REPEAT_NUM) - return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + low = onig_scan_unsigned_number(&p, end, env->enc); + if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (low > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (p == *src) goto invalid; /* can't read low */ + if (p == *src) { /* can't read low */ + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { + /* allow {,n} as {0,n} */ + low = 0; + non_low = 1; + } + else + goto invalid; + } if (PEND) goto invalid; PFETCH(c); if (c == ',') { UChar* prev = p; - up = regex_scan_unsigned_number(&p, end, env->enc); - if (up < 0) return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (up > REG_MAX_REPEAT_NUM) - return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == prev) up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + up = onig_scan_unsigned_number(&p, end, env->enc); + if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (up > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == prev) { + if (non_low != 0) + goto invalid; + up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } } else { + if (non_low != 0) + goto invalid; + PUNFETCH; up = low; /* {n} : exact n times */ } if (PEND) goto invalid; PFETCH(c); - if (IS_SYNTAX_OP(env->syntax, REG_SYN_OP_ESC_INTERVAL)) { - if (c != '\\') goto invalid; + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { + if (c != MC_ESC) goto invalid; PFETCH(c); } if (c != '}') goto invalid; if (!IS_REPEAT_INFINITE(up) && low > up) { - return REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; + return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; } tok->type = TK_INTERVAL; @@ -2165,7 +2224,7 @@ fetch_range_qualifier(UChar** src, UChar* end, RegToken* tok, ScanEnv* env) if (syn_allow) return 1; /* OK */ else - return REGERR_INVALID_REPEAT_RANGE_PATTERN; + return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; } /* \M-, \C-, \c, or \... */ @@ -2175,18 +2234,18 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) int c; UChar* p = *src; - if (PEND) return REGERR_END_PATTERN_AT_BACKSLASH; + if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH; PFETCH(c); switch (c) { case 'M': - if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_ESC_M_BAR_META)) { - if (PEND) return REGERR_END_PATTERN_AT_META; + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { + if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH(c); - if (c != '-') return REGERR_META_CODE_SYNTAX; - if (PEND) return REGERR_END_PATTERN_AT_META; + if (c != '-') return ONIGERR_META_CODE_SYNTAX; + if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH(c); - if (c == '\\') { + if (c == MC_ESC) { c = fetch_escaped_value(&p, end, env); if (c < 0) return c; } @@ -2197,21 +2256,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) break; case 'C': - if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { - if (PEND) return REGERR_END_PATTERN_AT_CONTROL; + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); - if (c != '-') return REGERR_CONTROL_CODE_SYNTAX; + if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; goto control; } else goto backslash; case 'c': - if (IS_SYNTAX_OP(env->syntax, REG_SYN_OP_ESC_C_CONTROL)) { + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { control: - if (PEND) return REGERR_END_PATTERN_AT_CONTROL; + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); - if (c == '\\') { + if (c == MC_ESC) { c = fetch_escaped_value(&p, end, env); if (c < 0) return c; } @@ -2235,80 +2294,175 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) return c; } -static int fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env); +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); -#ifdef USE_NAMED_SUBEXP +#ifdef USE_NAMED_GROUP +/* + def: 0 -> define name (don't allow number name) + 1 -> reference name (allow number name) +*/ static int -fetch_name(UChar** src, UChar* end, UChar** name_end, ScanEnv* env) +fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) { - int len; + int r, len, is_num; int c = 0; + UChar *name_end; UChar *p = *src; - while (!PEND) { - *name_end = p; + name_end = end; + r = 0; + is_num = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { PFETCH(c); - if (c == '>') break; - else if (c == ')' || c == '\\' || c == '\0') - return REGERR_INVALID_SUBEXP_NAME; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; - len = mblen(env->enc, c); - while (!PEND && len-- > 1) { + if (ONIGENC_IS_CODE_DIGIT(env->enc, c)) { + if (ref == 1) + is_num = 1; + else { + r = ONIGERR_INVALID_GROUP_NAME; + } + } + len = enc_len(env->enc, c); + while (!PEND && len-- > 1) PFETCH(c); + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')') break; + + len = enc_len(env->enc, c); + if (is_num == 1) { + if (! ONIGENC_IS_CODE_DIGIT(env->enc, c)) { + if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) && c != '_') + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else { + if (len == 1) { + if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) && + !ONIGENC_IS_CODE_DIGIT(env->enc, c) && + c != '_') { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } } + + while (!PEND && len-- > 1) + PFETCH(c); + } + if (c != '>') { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + c = **src; + if (ONIGENC_IS_CODE_UPPER(env->enc, c)) + r = ONIGERR_INVALID_GROUP_NAME; } - if (c != '>') return REGERR_INVALID_SUBEXP_NAME; - *src = p; - return 0; -} -#endif -static void -CC_ESC_WARN(ScanEnv* env, UChar *c) -{ -#ifdef WARNING - if (IS_SYNTAX_BV(env->syntax, REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED) && - IS_SYNTAX_BV(env->syntax, REG_SYN_ESCAPE_IN_CC)) { - char buf[WARN_BUFSIZE]; - regex_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - "character class has '%s' without escape", c); - WARNING(buf); + if (r == 0) { + *rname_end = name_end; + *src = p; + return 0; + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; } -#endif } - -static void +#else +static int +fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +{ + int r, len; + int c = 0; + UChar *name_end; + UChar *p = *src; + + r = 0; + while (!PEND) { + name_end = p; + PFETCH(c); + if (enc_len(env->enc, c) > 1) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + + if (c == '>' || c == ')') break; + if (! ONIGENC_IS_CODE_DIGIT(env->enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + if (c != '>') { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return 0; + } + else { + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif + +static void +CC_ESC_WARN(ScanEnv* env, UChar *c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { + char buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + "character class has '%s' without escape", c); + (*onig_warn)(buf); + } +} + +static void CCEND_ESC_WARN(ScanEnv* env, UChar* c) { -#ifdef WARNING - if (IS_SYNTAX_BV((env)->syntax, REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED)) { + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { char buf[WARN_BUFSIZE]; - regex_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, (env)->pattern, (env)->pattern_end, "regular expression has '%s' without escape", c); - WARNING(buf); + (*onig_warn)(buf); } -#endif } static UChar* -find_str_position(WCINT s[], int n, UChar* from, UChar* to, UChar **next, - RegCharEncoding enc) +find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, + UChar **next, OnigEncoding enc) { int i; - WCINT x; + OnigCodePoint x; UChar *q; UChar *p = from; while (p < to) { - x = mb2wc(p, to, enc); - q = p + mblen(enc, *p); + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enc_len(enc, *p); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { - x = mb2wc(q, to, enc); + x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += mblen(enc, *q); + q += enc_len(enc, *q); } if (i >= n) { if (IS_NOT_NULL(next)) @@ -2322,11 +2476,11 @@ find_str_position(WCINT s[], int n, UChar* from, UChar* to, UChar **next, } static int -str_exist_check_with_esc(WCINT s[], int n, UChar* from, UChar* to, - WCINT bad, RegCharEncoding enc) +str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, + OnigCodePoint bad, OnigEncoding enc) { int i, in_esc; - WCINT x; + OnigCodePoint x; UChar *q; UChar *p = from; @@ -2334,24 +2488,24 @@ str_exist_check_with_esc(WCINT s[], int n, UChar* from, UChar* to, while (p < to) { if (in_esc) { in_esc = 0; - p += mblen(enc, *p); + p += enc_len(enc, *p); } else { - x = mb2wc(p, to, enc); - q = p + mblen(enc, *p); + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enc_len(enc, *p); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { - x = mb2wc(q, to, enc); + x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += mblen(enc, *q); + q += enc_len(enc, *q); } if (i >= n) return 1; - p += mblen(enc, *p); + p += enc_len(enc, *p); } else { - x = mb2wc(p, to, enc); + x = ONIGENC_MBC_TO_CODE(enc, p, to); if (x == bad) return 0; - else if (x == '\\') in_esc = 1; + else if (x == MC_ESC) in_esc = 1; p = q; } } @@ -2360,10 +2514,10 @@ str_exist_check_with_esc(WCINT s[], int n, UChar* from, UChar* to, } static int -fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) +fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int c, num; - RegSyntaxType* syn = env->syntax; + OnigSyntaxType* syn = env->syntax; UChar* prev; UChar* p = *src; @@ -2382,11 +2536,11 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) else if (c == '-') { tok->type = TK_CC_RANGE; } - else if (c == '\\') { - if (! IS_SYNTAX_BV(syn, REG_SYN_ESCAPE_IN_CC)) + else if (c == MC_ESC) { + if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) goto end; - if (PEND) return REGERR_END_PATTERN_AT_BACKSLASH; + if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH; PFETCH(c); tok->escaped = 1; @@ -2417,31 +2571,41 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.subtype = CTYPE_NOT_WHITE_SPACE; break; + case 'p': + case 'P': + if (PPEEK == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + } + break; + case 'x': if (PEND) break; prev = p; - if (PPEEK == '{' && IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_BRACE_HEX8)) { + if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc); - if (num < 0) return REGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND && IS_XDIGIT(*p) && p - prev >= 9) - return REGERR_TOO_LONG_WIDE_CHAR_VALUE; + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; if (p > prev + 1 && !PEND && PPEEK == '}') { PINC; - tok->type = TK_WC; - tok->base = 16; - tok->u.wc = (WCINT )num; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; } else { /* can't read nothing or invalid format */ p = prev; } } - else if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_HEX2)) { + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -2455,9 +2619,9 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) if (PEND) break; prev = p; - if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_ESC_U_HEX4)) { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -2469,11 +2633,11 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_OCTAL3)) { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { PUNFETCH; prev = p; num = scan_unsigned_octal_number(&p, end, 3, env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -2495,11 +2659,12 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) } } else if (c == '[') { - if (IS_SYNTAX_OP(syn, REG_SYN_OP_POSIX_BRACKET) && PPEEK == ':') { - WCINT send[] = { (WCINT )':', (WCINT )']' }; + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && PPEEK == ':') { + OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; tok->backp = p; /* point at '[' is readed */ PINC; - if (str_exist_check_with_esc(send, 2, p, end, (WCINT )']', env->enc)) { + if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']', + env->enc)) { tok->type = TK_POSIX_BRACKET_OPEN; } else { @@ -2509,7 +2674,7 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) } else { cc_in_cc: - if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_CCLASS_SET)) { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { tok->type = TK_CC_CC_OPEN; } else { @@ -2518,7 +2683,8 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) } } else if (c == '&') { - if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_CCLASS_SET) && !PEND && PPEEK == '&') { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && + !PEND && PPEEK == '&') { PINC; tok->type = TK_CC_AND; } @@ -2530,10 +2696,10 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) } static int -fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) +fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, c, num; - RegSyntaxType* syn = env->syntax; + OnigSyntaxType* syn = env->syntax; UChar* prev; UChar* p = *src; @@ -2546,15 +2712,15 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_BYTE; tok->base = 0; PFETCH(c); - if (c == '\\') { - if (PEND) return REGERR_END_PATTERN_AT_BACKSLASH; + if (c == MC_ESC) { + if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH; PFETCH(c); tok->u.c = c; tok->escaped = 1; switch (c) { case '*': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_0INF)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = REPEAT_INFINITE; @@ -2562,7 +2728,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '+': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_1INF)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 1; tok->u.repeat.upper = REPEAT_INFINITE; @@ -2570,20 +2736,21 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '?': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_01)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = 1; greedy_check: - if (!PEND && PPEEK == '?' && IS_SYNTAX_OP(syn, REG_SYN_OP_NON_GREEDY)) { + if (!PEND && PPEEK == '?' && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { PFETCH(c); tok->u.repeat.greedy = 0; tok->u.repeat.possessive = 0; } else if (!PEND && PPEEK == '+' && - ((IS_SYNTAX_OP2(syn, REG_SYN_OP2_POSSESSIVE_REPEAT) && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && tok->type != TK_INTERVAL) || - (IS_SYNTAX_OP2(syn, REG_SYN_OP2_POSSESSIVE_INTERVAL) && + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && tok->type == TK_INTERVAL))) { PFETCH(c); tok->u.repeat.greedy = 1; @@ -2596,7 +2763,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '{': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_INTERVAL)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; tok->backp = p; r = fetch_range_qualifier(&p, end, tok, env); if (r < 0) return r; /* error */ @@ -2608,115 +2775,115 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '|': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_ALT)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; tok->type = TK_ALT; break; case '(': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_SUBEXP)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; tok->type = TK_SUBEXP_OPEN; break; case ')': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_SUBEXP)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; tok->type = TK_SUBEXP_CLOSE; break; case 'w': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_WORD; break; case 'W': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_WORD; break; case 'b': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BOUND)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_WORD_BOUND; break; case 'B': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BOUND)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_NOT_WORD_BOUND; break; #ifdef USE_WORD_BEGIN_END case '<': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BEGIN_END)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_WORD_BEGIN; break; case '>': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BEGIN_END)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_WORD_END; break; #endif case 's': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WHITE_SPACE)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_WHITE_SPACE; break; case 'S': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WHITE_SPACE)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_WHITE_SPACE; break; case 'd': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_DIGIT)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_DIGIT; break; case 'D': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_DIGIT)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_DIGIT; break; case 'A': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_BEGIN_BUF; break; case 'Z': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_SEMI_END_BUF; break; case 'z': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; end_buf: tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_END_BUF; break; case 'G': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_BEGIN_POSITION; break; case '`': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_GNU_BUF_ANCHOR)) break; + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; goto begin_buf; break; case '\'': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_GNU_BUF_ANCHOR)) break; + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; goto end_buf; break; @@ -2724,26 +2891,26 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) if (PEND) break; prev = p; - if (PPEEK == '{' && IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_BRACE_HEX8)) { + if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc); - if (num < 0) return REGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND && IS_XDIGIT(*p) && p - prev >= 9) - return REGERR_TOO_LONG_WIDE_CHAR_VALUE; + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; if (p > prev + 1 && !PEND && PPEEK == '}') { PINC; - tok->type = TK_WC; - tok->u.wc = (WCINT )num; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; } else { /* can't read nothing or invalid format */ p = prev; } } - else if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_HEX2)) { + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -2757,9 +2924,9 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) if (PEND) break; prev = p; - if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_ESC_U_HEX4)) { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -2773,20 +2940,21 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) case '5': case '6': case '7': case '8': case '9': PUNFETCH; prev = p; - num = regex_scan_unsigned_number(&p, end, env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; - if (num > REG_MAX_BACKREF_NUM) return REGERR_TOO_BIG_BACKREF_NUMBER; + num = onig_scan_unsigned_number(&p, end, env->enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (num > ONIG_MAX_BACKREF_NUM) return ONIGERR_TOO_BIG_BACKREF_NUMBER; - if (IS_SYNTAX_OP(syn, REG_SYN_OP_BACK_REF) && + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ - if (IS_SYNTAX_BV(syn, REG_SYN_STRICT_CHECK_BACKREF)) { + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) - return REGERR_INVALID_BACKREF; + return ONIGERR_INVALID_BACKREF; } tok->type = TK_BACKREF; - tok->u.backref.num = 1; - tok->u.backref.ref1 = num; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; break; } else if (c == '8' || c == '9') { @@ -2798,10 +2966,10 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) p = prev; /* fall through */ case '0': - if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_OCTAL3)) { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { prev = p; num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), env->enc); - if (num < 0) return REGERR_TOO_BIG_NUMBER; + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -2814,33 +2982,34 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) } break; -#ifdef USE_NAMED_SUBEXP +#ifdef USE_NAMED_GROUP case 'k': - if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_NAMED_SUBEXP)) { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { PFETCH(c); if (c == '<') { UChar* name_end; int* backs; prev = p; - r = fetch_name(&p, end, &name_end, env); + r = fetch_name(&p, end, &name_end, env, 1); if (r < 0) return r; - num = regex_name_to_group_numbers(env->reg, prev, name_end, &backs); + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); if (num <= 0) { - regex_scan_env_set_error_string(env, - REGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); - return REGERR_UNDEFINED_NAME_REFERENCE; + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; } - if (IS_SYNTAX_BV(syn, REG_SYN_STRICT_CHECK_BACKREF)) { + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { int i; for (i = 0; i < num; i++) { if (backs[i] > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) - return REGERR_INVALID_BACKREF; + return ONIGERR_INVALID_BACKREF; } } tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; if (num == 1) { tok->u.backref.num = 1; tok->u.backref.ref1 = backs[0]; @@ -2858,13 +3027,13 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) #ifdef USE_SUBEXP_CALL case 'g': - if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_SUBEXP_CALL)) { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { PFETCH(c); if (c == '<') { UChar* name_end; prev = p; - r = fetch_name(&p, end, &name_end, env); + r = fetch_name(&p, end, &name_end, env, 1); if (r < 0) return r; tok->type = TK_CALL; @@ -2878,11 +3047,21 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) #endif case 'Q': - if (IS_SYNTAX_OP(syn, REG_SYN_OP_QUOTE)) { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { tok->type = TK_QUOTE_OPEN; } break; + case 'p': + case 'P': + if (PPEEK == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + } + break; + default: PUNFETCH; num = fetch_escaped_value(&p, end, env); @@ -2899,14 +3078,34 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.c = c; tok->escaped = 0; +#ifdef USE_VARIABLE_META_CHARS + if ((c != ONIG_INEFFECTIVE_META_CHAR) && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + if (c == MC_ANYCHAR) + goto any_char; + else if (c == MC_ANYTIME) + goto anytime; + else if (c == MC_ZERO_OR_ONE_TIME) + goto zero_or_one_time; + else if (c == MC_ONE_OR_MORE_TIME) + goto one_or_more_time; + else if (c == MC_ANYCHAR_ANYTIME) { + tok->type = TK_ANYCHAR_ANYTIME; + goto out; + } + } +#endif + switch (c) { case '.': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ANYCHAR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; + any_char: tok->type = TK_ANYCHAR; break; case '*': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_0INF)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; + anytime: tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = REPEAT_INFINITE; @@ -2914,7 +3113,8 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '+': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_1INF)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; + one_or_more_time: tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 1; tok->u.repeat.upper = REPEAT_INFINITE; @@ -2922,7 +3122,8 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '?': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_01)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; + zero_or_one_time: tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = 1; @@ -2930,7 +3131,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '{': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_INTERVAL)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; tok->backp = p; r = fetch_range_qualifier(&p, end, tok, env); if (r < 0) return r; /* error */ @@ -2942,36 +3143,36 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '|': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ALT)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; tok->type = TK_ALT; break; case '(': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_SUBEXP)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; tok->type = TK_SUBEXP_OPEN; break; case ')': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_SUBEXP)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; tok->type = TK_SUBEXP_CLOSE; break; case '^': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_LINE_ANCHOR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->option) ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); break; case '$': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_LINE_ANCHOR)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->option) ? ANCHOR_END_BUF : ANCHOR_END_LINE); break; case '[': - if (! IS_SYNTAX_OP(syn, REG_SYN_OP_CC)) break; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; tok->type = TK_CC_OPEN; break; @@ -2984,7 +3185,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_EXTEND(env->option)) { while (!PEND) { PFETCH(c); - if (IS_NEWLINE(c)) + if (ONIG_IS_NEWLINE(c)) break; } goto start; @@ -3002,31 +3203,182 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env) } } + out: *src = p; return tok->type; } -static void -bitset_by_pred_func(BitSetRef bs, int (*pf)(RegCharEncoding, UChar), - RegCharEncoding code, int not) +static int +add_ctype_to_cc_by_list(CClassNode* cc, int ctype, int not, + OnigEncoding enc) { - int c; + int i, j, r, nsb, nmb; + OnigCodePointRange *sbr, *mbr; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr); + if (r != 0) return r; - if (not) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! pf(code, (UChar )c)) BITSET_SET_BIT(bs, c); + if (not == 0) { + for (i = 0; i < nsb; i++) { + for (j = sbr[i].from; j <= sbr[i].to; j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + for (i = 0; i < nmb; i++) { + r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to); + if (r != 0) return r; } } else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (pf(code, (UChar )c)) BITSET_SET_BIT(bs, c); + OnigCodePoint prev = 0; + for (i = 0; i < nsb; i++) { + for (j = prev; j < sbr[i].from; j++) { + BITSET_SET_BIT(cc->bs, j); + } + prev = sbr[i].to + 1; + } + if (prev < 0x7f) { + for (j = prev; j < 0x7f; j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + + prev = 0x80; + for (i = 0; i < nmb; i++) { + if (prev < mbr[i].from) { + r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1); + if (r != 0) return r; + } + prev = mbr[i].to + 1; + } + if (prev < 0x7fffffff) { + r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + if (r != 0) return r; + } + } + + return r; +} + +static int +add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) +{ + int c, r; + OnigEncoding enc = env->enc; + + if (ONIGENC_CTYPE_SUPPORT_LEVEL(enc) != ONIGENC_CTYPE_SUPPORT_LEVEL_SB) { + r = add_ctype_to_cc_by_list(cc, ctype, not, env->enc); + return r; + } + + r = 0; + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + case ONIGENC_CTYPE_BLANK: + case ONIGENC_CTYPE_CNTRL: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_LOWER: + case ONIGENC_CTYPE_PUNCT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_UPPER: + case ONIGENC_CTYPE_XDIGIT: + case ONIGENC_CTYPE_ASCII: + case ONIGENC_CTYPE_ALNUM: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + case ONIGENC_CTYPE_GRAPH: + case ONIGENC_CTYPE_PRINT: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + break; + + case ONIGENC_CTYPE_WORD: + if (not == 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_SB_WORD(enc, c) && ! ONIGENC_IS_MBC_HEAD(enc, c)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; } + + return r; +} + +static int +parse_ctype_to_enc_ctype(int pctype, int* not) +{ + int ctype; + + switch (pctype) { + case CTYPE_WORD: + ctype = ONIGENC_CTYPE_WORD; + *not = 0; + break; + case CTYPE_NOT_WORD: + ctype = ONIGENC_CTYPE_WORD; + *not = 1; + break; + case CTYPE_WHITE_SPACE: + ctype = ONIGENC_CTYPE_SPACE; + *not = 0; + break; + case CTYPE_NOT_WHITE_SPACE: + ctype = ONIGENC_CTYPE_SPACE; + *not = 1; + break; + case CTYPE_DIGIT: + ctype = ONIGENC_CTYPE_DIGIT; + *not = 0; + break; + case CTYPE_NOT_DIGIT: + ctype = ONIGENC_CTYPE_DIGIT; + *not = 1; + break; + default: + return ONIGERR_PARSER_BUG; + break; + } + return ctype; } typedef struct { - UChar *name; - int (*pf)(RegCharEncoding, UChar); + UChar *name; + int ctype; short int len; } PosixBracketEntryType; @@ -3037,24 +3389,24 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) #define POSIX_BRACKET_NAME_MAX_LEN 6 static PosixBracketEntryType PBS[] = { - { "alnum", is_code_alnum, 5 }, - { "alpha", is_code_alpha, 5 }, - { "blank", is_code_blank, 5 }, - { "cntrl", is_code_cntrl, 5 }, - { "digit", is_code_digit, 5 }, - { "graph", is_code_graph, 5 }, - { "lower", is_code_lower, 5 }, - { "print", is_code_print, 5 }, - { "punct", is_code_punct, 5 }, - { "space", is_code_space, 5 }, - { "upper", is_code_upper, 5 }, - { "xdigit", is_code_xdigit, 6 }, - { "ascii", is_code_ascii, 5 }, /* I don't know origin. Perl? */ - { (UChar* )NULL, is_code_alnum, 0 } + { "alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { "alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { "blank", ONIGENC_CTYPE_BLANK, 5 }, + { "cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { "digit", ONIGENC_CTYPE_DIGIT, 5 }, + { "graph", ONIGENC_CTYPE_GRAPH, 5 }, + { "lower", ONIGENC_CTYPE_LOWER, 5 }, + { "print", ONIGENC_CTYPE_PRINT, 5 }, + { "punct", ONIGENC_CTYPE_PUNCT, 5 }, + { "space", ONIGENC_CTYPE_SPACE, 5 }, + { "upper", ONIGENC_CTYPE_UPPER, 5 }, + { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */ + { (UChar* )NULL, -1, 0 } }; PosixBracketEntryType *pb; - int not, i, c; + int not, i, c, r; UChar *p = *src; if (PPEEK == '^') { @@ -3068,12 +3420,14 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) goto not_posix_bracket; for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (k_strncmp(p, pb->name, pb->len) == 0) { + if (onig_strncmp(p, pb->name, pb->len) == 0) { p += pb->len; if (end - p < 2 || *p != ':' || *(p+1) != ']') - return REGERR_INVALID_POSIX_BRACKET_TYPE; + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + + r = add_ctype_to_cc(cc, pb->ctype, not, env); + if (r != 0) return r; - bitset_by_pred_func(cc->bs, pb->pf, env->enc, not); PINC; PINC; *src = p; return 0; @@ -3092,13 +3446,89 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) if (!PEND) { PFETCH(c); if (c == ']') - return REGERR_INVALID_POSIX_BRACKET_TYPE; + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; } } return 1; /* 1: is not POSIX bracket, but no error. */ } +static int +property_name_to_ctype(UChar* p, UChar* end) +{ + static PosixBracketEntryType PBS[] = { + { "Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { "Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { "Blank", ONIGENC_CTYPE_BLANK, 5 }, + { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { "Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { "Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { "Lower", ONIGENC_CTYPE_LOWER, 5 }, + { "Print", ONIGENC_CTYPE_PRINT, 5 }, + { "Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { "Space", ONIGENC_CTYPE_SPACE, 5 }, + { "Upper", ONIGENC_CTYPE_UPPER, 5 }, + { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { "ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int len; + + len = end - p; + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (len == pb->len && onig_strncmp(p, pb->name, pb->len) == 0) + return pb->ctype; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +static int +fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) +{ + int ctype; + UChar *prev, *p = *src; + int c = 0; + + while (!PEND) { + prev = p; + PFETCH(c); + if (c == '}') { + ctype = property_name_to_ctype(*src, prev); + if (ctype < 0) return ctype; + + *src = p; + return ctype; + } + else if (c == '(' || c == ')' || c == '{' || c == '|') + break; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +static int +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, ctype; + CClassNode* cc; + + ctype = fetch_char_property_to_ctype(src, end, env); + if (ctype < 0) return ctype; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + r = add_ctype_to_cc(cc, ctype, 0, env); + if (r != 0) return r; + if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc); + + return 0; +} + enum CCSTATE { CCS_VALUE, @@ -3109,79 +3539,36 @@ enum CCSTATE { enum CCVALTYPE { CCV_SB, - CCV_WC, + CCV_CODE_POINT, CCV_CLASS }; static int -next_state_class(CClassNode* cc, RegToken* tok, WCINT* vs, - enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) +next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) { - int r, c; + int r; if (*state == CCS_RANGE) - return REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; + return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; if (*state == CCS_VALUE && *type != CCV_CLASS) { if (*type == CCV_SB) BITSET_SET_BIT(cc->bs, (int )(*vs)); - else if (*type == CCV_WC) { - r = add_wc_range(&(cc->mbuf), env, *vs, *vs); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; } } - if (tok->type == TK_CHAR_TYPE) { - switch (tok->u.subtype) { - case CTYPE_WORD: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_WORD(env->enc, c)) BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(env->enc, cc->mbuf); - break; - case CTYPE_NOT_WORD: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! IS_CODE_WORD(env->enc, c)) BITSET_SET_BIT(cc->bs, c); - } - break; - case CTYPE_WHITE_SPACE: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(cc->bs, c); - } - break; - case CTYPE_NOT_WHITE_SPACE: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(env->enc, cc->mbuf); - break; - case CTYPE_DIGIT: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(cc->bs, c); - } - break; - case CTYPE_NOT_DIGIT: - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(env->enc, cc->mbuf); - break; - default: - return REGERR_PARSER_BUG; - break; - } - } - else { /* TK_POSIX_BRACKET_OPEN */ - /* nothing */ - } - *state = CCS_VALUE; *type = CCV_CLASS; return 0; } static int -next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw, +next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, + int* vs_israw, int v_israw, enum CCVALTYPE intype, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) { @@ -3191,8 +3578,8 @@ next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw, case CCS_VALUE: if (*type == CCV_SB) BITSET_SET_BIT(cc->bs, (int )(*vs)); - else if (*type == CCV_WC) { - r = add_wc_range(&(cc->mbuf), env, *vs, *vs); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; } break; @@ -3200,55 +3587,28 @@ next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw, case CCS_RANGE: if (intype == *type) { if (intype == CCV_SB) { - if (IS_IGNORECASE(env->option) && (*vs_israw == 0 && v_israw == 0)) { - int low, high; - - low = TOLOWER(env->enc, *vs); - high = TOLOWER(env->enc, v); - if (low > high) { - if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return REGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - - if (low < 'A' && high >= 'a' && high <= 'z') { - bitset_set_range(cc->bs, low, (int )'A' - 1); - bitset_set_range(cc->bs, (int )'a', high); - } - else if (high > 'z' && low >= 'a' && low <= 'z') { - bitset_set_range(cc->bs, low, (int )'z'); - bitset_set_range(cc->bs, (int )'z' + 1, high); - } - else { - bitset_set_range(cc->bs, low, high); - } - } - else { - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return REGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )v); + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } + bitset_set_range(cc->bs, (int )*vs, (int )v); } else { - r = add_wc_range(&(cc->mbuf), env, *vs, v); + r = add_code_range(&(cc->mbuf), env, *vs, v); if (r < 0) return r; } } else { -#ifndef REG_RUBY_M17N - if (env->enc == REGCODE_UTF8 && intype == CCV_WC && *type == CCV_SB) { + if (intype == CCV_CODE_POINT && *type == CCV_SB && + ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) { bitset_set_range(cc->bs, (int )*vs, 0x7f); - r = add_wc_range(&(cc->mbuf), env, (WCINT )0x80, v); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v); if (r < 0) return r; } else -#endif - return REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; + return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; } ccs_range_end: *state = CCS_COMPLETE; @@ -3271,7 +3631,7 @@ next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw, static int char_exist_check(UChar c, UChar* from, UChar* to, int ignore_escaped, - RegCharEncoding enc) + OnigEncoding enc) { int in_esc; UChar* p = from; @@ -3283,19 +3643,19 @@ char_exist_check(UChar c, UChar* from, UChar* to, int ignore_escaped, } else { if (*p == c) return 1; - if (*p == '\\') in_esc = 1; + if (*p == MC_ESC) in_esc = 1; } - p += mblen(enc, *p); + p += enc_len(enc, *p); } return 0; } static int -parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, neg, len, fetched, and_start; - WCINT v, vs; + OnigCodePoint v, vs; UChar *p; Node* node; CClassNode *cc, *prev_cc; @@ -3305,9 +3665,10 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, enum CCVALTYPE val_type, in_type; int val_israw, in_israw; + prev_cc = (CClassNode* )NULL; *np = NULL_NODE; r = fetch_token_in_cc(tok, src, end, env); - if (r == TK_BYTE && tok->u.c == '^') { + if (r == TK_BYTE && tok->u.c == '^' && tok->escaped == 0) { neg = 1; r = fetch_token_in_cc(tok, src, end, env); } @@ -3318,16 +3679,15 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, if (r < 0) return r; if (r == TK_CC_CLOSE) { if (! char_exist_check(']', *src, env->pattern_end, 1, env->enc)) - return REGERR_EMPTY_CHAR_CLASS; + return ONIGERR_EMPTY_CHAR_CLASS; CC_ESC_WARN(env, "]"); r = tok->type = TK_BYTE; /* allow []...] */ } *np = node = node_new_cclass(); - CHECK_NULL_RETURN_VAL(node, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); cc = &(NCCLASS(node)); - prev_cc = (CClassNode* )NULL; and_start = 0; state = CCS_START; @@ -3336,32 +3696,34 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, fetched = 0; switch (r) { case TK_BYTE: - len = mblen(env->enc, tok->u.c); + len = enc_len(env->enc, tok->u.c); if (len > 1) { PUNFETCH; - v = MB2WC(p, end, env->enc); + v = ONIGENC_MBC_TO_CODE(env->enc, p, end); p += len; + in_type = CCV_CODE_POINT; } else { sb_char: - v = (WCINT )tok->u.c; + v = (OnigCodePoint )tok->u.c; + in_type = CCV_SB; } in_israw = 0; - goto val_entry; + goto val_entry2; break; case TK_RAW_BYTE: - len = mblen(env->enc, tok->u.c); + len = enc_len(env->enc, tok->u.c); if (len > 1 && tok->base != 0) { /* tok->base != 0 : octal or hexadec. */ - UChar buf[WC2MB_MAX_BUFLEN]; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; UChar* bufp = buf; - UChar* bufe = buf + WC2MB_MAX_BUFLEN; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; int i, base = tok->base; - if (len > WC2MB_MAX_BUFLEN) { + if (len > ONIGENC_CODE_TO_MBC_MAXLEN) { bufp = (UChar* )xmalloc(len); if (IS_NULL(bufp)) { - r = REGERR_MEMORY; + r = ONIGERR_MEMORY; goto err; } bufe = bufp + len; @@ -3374,27 +3736,34 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, bufp[i] = tok->u.c; } if (i < len) { - r = REGERR_TOO_SHORT_MULTI_BYTE_STRING; + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; raw_byte_err: if (bufp != buf) xfree(bufp); goto err; } - v = MB2WC(bufp, bufe, env->enc); - fetched = 1; + v = ONIGENC_MBC_TO_CODE(env->enc, bufp, bufe); if (bufp != buf) xfree(bufp); + in_type = CCV_CODE_POINT; } else { - v = (WCINT )tok->u.c; + v = (OnigCodePoint )tok->u.c; + in_type = CCV_SB; } in_israw = 1; - goto val_entry; + goto val_entry2; break; - case TK_WC: - v = tok->u.wc; + case TK_CODE_POINT: + v = tok->u.code; in_israw = 1; val_entry: - in_type = (v < SINGLE_BYTE_SIZE ? CCV_SB : CCV_WC); + len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); + if (len < 0) { + r = len; + goto err; + } + in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); + val_entry2: r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, &state, env); if (r != 0) goto err; @@ -3406,16 +3775,38 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, if (r == 1) { /* is not POSIX bracket */ CC_ESC_WARN(env, "["); p = tok->backp; - v = (WCINT )tok->u.c; + v = (OnigCodePoint )tok->u.c; in_israw = 0; goto val_entry; } - /* POSIX bracket fall */ + goto next_class; + break; + case TK_CHAR_TYPE: - r = next_state_class(cc, tok, &vs, &val_type, &state, env); + { + int ctype, not; + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + r = add_ctype_to_cc(cc, ctype, not, env); + if (r != 0) return r; + } + + next_class: + r = next_state_class(cc, &vs, &val_type, &state, env); if (r != 0) goto err; break; + case TK_CHAR_PROPERTY: + { + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); + if (r != 0) return r; + goto next_class; + } + break; + case TK_CC_RANGE: if (state == CCS_VALUE) { r = fetch_token_in_cc(tok, &p, end, env); @@ -3423,7 +3814,7 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, fetched = 1; if (r == TK_CC_CLOSE) { /* allow [x-] */ range_end_val: - v = (WCINT )'-'; + v = (OnigCodePoint )'-'; in_israw = 0; goto val_entry; } @@ -3435,7 +3826,7 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, } else if (state == CCS_START) { /* [-xa] is allowed */ - v = (WCINT )tok->u.c; + v = (OnigCodePoint )tok->u.c; in_israw = 0; r = fetch_token_in_cc(tok, &p, end, env); @@ -3461,11 +3852,11 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, goto range_end_val; } - if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_RANGE_OP_IN_CC)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { CC_ESC_WARN(env, "-"); goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ } - r = REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; } break; @@ -3478,10 +3869,10 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, r = parse_char_class(&anode, tok, &p, end, env); if (r != 0) goto cc_open_err; acc = &(NCCLASS(anode)); - r = or_cclass(cc, acc); + r = or_cclass(cc, acc, env->enc); + onig_node_free(anode); cc_open_err: - regex_node_free(anode); if (r != 0) goto err; } break; @@ -3498,8 +3889,9 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, state = CCS_START; if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc); + r = and_cclass(prev_cc, cc, env->enc); if (r != 0) goto err; + bbuf_free(cc->mbuf); } else { prev_cc = cc; @@ -3510,11 +3902,11 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, break; case TK_EOT: - r = REGERR_PREMATURE_END_OF_CHAR_CLASS; + r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; goto err; break; default: - r = REGERR_PARSER_BUG; + r = ONIGERR_PARSER_BUG; goto err; break; } @@ -3534,55 +3926,60 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end, } if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc); + r = and_cclass(prev_cc, cc, env->enc); if (r != 0) goto err; + bbuf_free(cc->mbuf); cc = prev_cc; } cc->not = neg; if (cc->not != 0 && - IS_SYNTAX_BV(env->syntax, REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { + IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { int is_empty; is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); if (is_empty != 0) BITSET_IS_EMPTY(cc->bs, is_empty); if (is_empty == 0) - BITSET_SET_BIT(cc->bs, NEWLINE); + BITSET_SET_BIT(cc->bs, ONIG_NEWLINE); } *src = p; return 0; err: - regex_node_free(*np); + if (cc != &(NCCLASS(*np))) + bbuf_free(cc->mbuf); + onig_node_free(*np); return r; } -static int parse_subexp(Node** top, RegToken* tok, int term, +static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); static int -parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end, +parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { Node *target; - RegOptionType option; + OnigOptionType option; int r, c, num; + int list_capture; UChar* p = *src; *np = NULL; - if (PEND) return REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; option = env->option; - if (PPEEK == '?' && IS_SYNTAX_OP(env->syntax, REG_SYN_OP_SUBEXP_EFFECT)) { + if (PPEEK == '?' && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { PINC; - if (PEND) return REGERR_END_PATTERN_IN_GROUP; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; PFETCH(c); switch (c) { case '#': /* (?#...) comment */ while (1) { - if (PEND) return REGERR_END_PATTERN_IN_GROUP; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; PFETCH(c); if (c == ')') break; } @@ -3590,15 +3987,21 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end, return 3; /* 3: comment */ break; - case ':': /* (?:...) grouping only */ - goto group; + case ':': /* (?:...) grouping only */ + group: + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(np, tok, term, &p, end, env); + if (r < 0) return r; + *src = p; + return 1; /* group */ break; case '=': - *np = regex_node_new_anchor(ANCHOR_PREC_READ); + *np = onig_node_new_anchor(ANCHOR_PREC_READ); break; case '!': /* preceding read */ - *np = regex_node_new_anchor(ANCHOR_PREC_READ_NOT); + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ *np = node_new_effect(EFFECT_STOP_BACKTRACK); @@ -3607,29 +4010,70 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end, case '<': /* look behind (?<=...), (?syntax, REG_SYN_OP2_NAMED_SUBEXP)) { + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); +#ifdef USE_NAMED_GROUP + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; UChar *name_end; + PUNFETCH; + list_capture = 0; + + named_group: name = p; - r = fetch_name(&p, end, &name_end, env); + r = fetch_name(&p, end, &name_end, env, 0); if (r < 0) return r; - *np = node_new_effect(EFFECT_MEMORY); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); num = scan_env_add_mem_entry(env); if (num < 0) return num; - NEFFECT(*np).regnum = num; - r = name_add(env->reg, name, name_end, num); + if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); if (r != 0) return r; + *np = node_new_effect_memory(env->option, 1); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + NEFFECT(*np).regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; } #endif else - return REGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case '@': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { +#ifdef USE_NAMED_GROUP + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<') { + list_capture = 1; + goto named_group; /* (?@...) */ + } + PUNFETCH; + } +#endif + *np = node_new_effect_memory(env->option, 0); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + num = scan_env_add_mem_entry(env); + if (num < 0) { + onig_node_free(*np); + return num; + } + else if (num >= BIT_STATUS_BITS_NUM) { + onig_node_free(*np); + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + } + NEFFECT(*np).regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } break; #ifdef USE_POSIXLINE_OPTION @@ -3646,100 +4090,80 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end, break; case '-': neg = 1; break; - case 'x': ONOFF(option, REG_OPTION_EXTEND, neg); break; - case 'i': ONOFF(option, REG_OPTION_IGNORECASE, neg); break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; case 's': - if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, REG_OPTION_MULTILINE, neg); + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); } else - return REGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; break; case 'm': - if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, REG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); } - else if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_OPTION_RUBY)) { - ONOFF(option, REG_OPTION_MULTILINE, neg); + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); } else - return REGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; break; #ifdef USE_POSIXLINE_OPTION case 'p': - ONOFF(option, REG_OPTION_MULTILINE|REG_OPTION_SINGLELINE, neg); + ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); break; #endif default: - return REGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; } - if (c == ')') { /* option only */ - if (option == env->option) { - *np = node_new_empty(); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - *src = p; - return 0; - } - else { - *np = node_new_option(option); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - *src = p; - return 2; /* option only */ - } + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *src = p; + return 2; /* option only */ } else if (c == ':') { - if (env->option == option) { - group: - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(np, tok, term, &p, end, env); - if (r < 0) return r; - *src = p; - return 1; /* group */ - } - else { - RegOptionType prev = env->option; - - env->option = option; - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - env->option = prev; - if (r < 0) return r; - *np = node_new_option(option); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - NEFFECT(*np).target = target; - *src = p; - return 0; - } + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + NEFFECT(*np).target = target; + *src = p; + return 0; } - if (PEND) return REGERR_END_PATTERN_IN_GROUP; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; PFETCH(c); } } break; default: - return REGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; } } else { -#ifdef USE_NAMED_SUBEXP - if (IS_REG_OPTION_ON(env->option, REG_OPTION_CAPTURE_ONLY_NAMED_GROUP)) { +#ifdef USE_NAMED_GROUP + if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; - } #endif - *np = node_new_effect(EFFECT_MEMORY); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); + *np = node_new_effect_memory(env->option, 0); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); num = scan_env_add_mem_entry(env); if (num < 0) return num; NEFFECT(*np).regnum = num; } - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); @@ -3747,8 +4171,14 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end, if (NTYPE(*np) == N_ANCHOR) NANCHOR(*np).target = target; - else + else { NEFFECT(*np).target = target; + if (NEFFECT(*np).type == EFFECT_MEMORY) { + /* Don't move this to previous of parse_subexp() */ + r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np); + if (r != 0) return r; + } + } *src = p; return 0; @@ -3784,16 +4214,19 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) QualifierNode* qnt = &(NQUALIFIER(target)); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (qn->by_number == 0 && qnt->by_number == 0) { + if (qn->by_number == 0 && qnt->by_number == 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { if (IS_REPEAT_INFINITE(qn->upper)) { if (qn->lower == 0) { /* '*' */ redundant: { char buf[WARN_BUFSIZE]; - regex_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - "redundant nested repeat operator"); - VERB_WARNING(buf); + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + "redundant nested repeat operator"); + (*onig_verb_warn)(buf); + } goto warn_exit; } } @@ -3816,13 +4249,11 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) } } } -#endif -#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR warn_exit: #endif if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) { - reduce_nested_qualifier(qnode, target); + onig_reduce_nested_qualifier(qnode, target); goto q_exit; } } @@ -3837,12 +4268,82 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) return 0; } +#ifdef USE_FOLD_MATCH +static int +make_alt_node_from_fold_info(OnigEncFoldMatchInfo* info, Node** node) +{ + int i; + UChar *s, *end; + Node *root, **ptail, *snode; + + ptail = &root; + for (i = 0; i < info->target_num; i++) { + s = info->target_str[i]; + end = s + info->target_byte_len[i]; + /* ex. + U+00DF match "ss" and "SS, but not match "Ss". + So, string nodes must be raw. + */ + snode = node_new_str_raw(s, end); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + + *ptail = node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); + ptail = &(NCONS(*ptail).right); + } + *ptail = NULL_NODE; + *node = root; + return 0; +} + +static int +make_fold_alt_node_from_cc(OnigEncoding enc, CClassNode* cc, Node** root) +{ + int i, j, flen, len, ncode, n; + UChar *s, *end, buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + OnigCodePoint* codes; + Node **ptail, *snode; + OnigEncFoldMatchInfo* info; + + *root = NULL_NODE; + ptail = root; + + ncode = ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc, &codes); + n = 0; + for (i = 0; i < ncode; i++) { + if (onig_is_code_in_cc(enc, codes[i], cc)) { + len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf); + flen = ONIGENC_GET_FOLD_MATCH_INFO(enc, buf, buf + len, &info); + if (flen > 0) { /* fold */ + for (j = 0; j < info->target_num; j++) { + s = info->target_str[j]; + end = s + info->target_byte_len[j]; + if (onig_strncmp(s, buf, enc_len(enc, *s)) == 0) + continue; /* ignore single char. */ + + snode = node_new_str_raw(s, end); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + + *ptail = node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); + ptail = &(NCONS(*ptail).right); + n++; + } + } + } + } + + return n; +} +#endif + static int -parse_exp(Node** np, RegToken* tok, int term, +parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { - int r, len, c, group = 0; + int r, len, group = 0; Node* qn; + Node** targetp; start: *np = NULL; @@ -3863,9 +4364,13 @@ parse_exp(Node** np, RegToken* tok, int term, if (r == 1) group = 1; else if (r == 2) { /* option only */ Node* target; + OnigOptionType prev = env->option; + + env->option = NEFFECT(*np).option; r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); + env->option = prev; if (r < 0) return r; NEFFECT(*np).target = target; return tok->type; @@ -3875,17 +4380,11 @@ parse_exp(Node** np, RegToken* tok, int term, if (r < 0) return r; goto start; } - else { - if (NTYPE(*np) == N_EFFECT && NEFFECT(*np).type == EFFECT_MEMORY) { - r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np); - if (r != 0) return r; - } - } break; case TK_SUBEXP_CLOSE: - if (! IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) - return REGERR_UNMATCHED_CLOSE_PARENTHESIS; + if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) + return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; if (tok->escaped) goto tk_raw_byte; else goto tk_byte; @@ -3893,58 +4392,124 @@ parse_exp(Node** np, RegToken* tok, int term, case TK_BYTE: tk_byte: - *np = node_new_str_char((UChar )tok->u.c); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - len = mblen(env->enc, tok->u.c); - if (len > 1) { - regex_node_str_cat(*np, *src, *src + len - 1); - *src += (len - 1); - } - while (1) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_BYTE) goto repeat; + { + *np = node_new_str_char((UChar )tok->u.c); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - r = node_str_cat_char(*np, (UChar )tok->u.c); - if (r < 0) return r; - len = mblen(env->enc, tok->u.c); - if (len > 1) { - regex_node_str_cat(*np, *src, *src + len - 1); - *src += (len - 1); + while (1) { + len = enc_len(env->enc, tok->u.c); + if (len > 1) { + r = onig_node_str_cat(*np, *src, *src + len - 1); + if (r < 0) return r; + *src += (len - 1); + } + + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_BYTE) break; + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; } + + fold_entry: +#ifdef USE_FOLD_MATCH + if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) { + int flen, ret; + Node *root, **ptail, *work, *snode, *anode; + UChar *p, *pprev; + OnigEncFoldMatchInfo* fold_info; + StrNode* sn = &(NSTRING(*np)); + + ptail = &root; + pprev = sn->s; + for (p = sn->s; p < sn->end; ) { + flen = ONIGENC_GET_FOLD_MATCH_INFO(env->enc, p, sn->end, &fold_info); + if (flen > 0) { /* fold */ + ret = make_alt_node_from_fold_info(fold_info, &anode); + if (ret != 0) return ret; + work = node_new_list(anode, NULL); + CHECK_NULL_RETURN_VAL(work, ONIGERR_MEMORY); + + if (pprev < p) { + snode = node_new_str(pprev, p); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + *ptail = node_new_list(snode, work); + CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); + } + else { + *ptail = work; + } + ptail = &(NCONS(work).right); + p += flen; + pprev = p; + } + else + p += enc_len(env->enc, *p); + } + *ptail = NULL_NODE; + if (IS_NOT_NULL(root)) { + if (pprev < sn->end) { + snode = node_new_str(pprev, sn->end); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + *ptail = node_new_list(snode, NULL_NODE); + CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); + } + onig_node_free(*np); + *np = root; + } + } +#endif + targetp = np; + goto repeat; } break; case TK_RAW_BYTE: tk_raw_byte: - *np = node_new_str_raw_char((UChar )tok->u.c); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - while (1) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_RAW_BYTE) goto repeat; + { + int expect_len; - r = node_str_cat_char(*np, (UChar )tok->u.c); - if (r < 0) return r; + *np = node_new_str_raw_char((UChar )tok->u.c); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + expect_len = enc_len(env->enc, tok->u.c); + len = 1; + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { +#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + if (len >= expect_len) { + NSTRING_CLEAR_RAW(*np); + } +#endif + goto fold_entry; + } + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; + len++; + } } break; - case TK_WC: + case TK_CODE_POINT: { - UChar buf[WC2MB_MAX_BUFLEN]; - UChar* bufs = buf; - UChar* bufe = bufs + WC2MB_MAX_BUFLEN; - int num = wc2mb_buf(tok->u.wc, &bufs, &bufe, env->enc); + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); if (num < 0) return num; - *np = node_new_str_raw(bufs, bufe); - if (bufs != buf) xfree(bufs); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + *np = node_new_str_raw(buf, buf + num); +#else + *np = node_new_str(buf, buf + num); +#endif + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); } break; case TK_QUOTE_OPEN: { - WCINT end_op[] = { (WCINT )'\\', (WCINT )'E' }; + OnigCodePoint end_op[] = { (OnigCodePoint )MC_ESC, (OnigCodePoint )'E' }; UChar *qstart, *qend, *nextp; qstart = *src; @@ -3953,90 +4518,113 @@ parse_exp(Node** np, RegToken* tok, int term, nextp = qend = end; } *np = node_new_str(qstart, qend); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); *src = nextp; } break; case TK_CHAR_TYPE: - switch (tok->u.subtype) { - case CTYPE_WORD: - case CTYPE_NOT_WORD: - *np = node_new_ctype(tok->u.subtype); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - break; - - case CTYPE_WHITE_SPACE: - *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c); - } - break; - - case CTYPE_NOT_WHITE_SPACE: - *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c); - } - break; + { + switch (tok->u.subtype) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *np = node_new_ctype(tok->u.subtype); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; - case CTYPE_DIGIT: - *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c); - } - break; + case CTYPE_WHITE_SPACE: + case CTYPE_NOT_WHITE_SPACE: + case CTYPE_DIGIT: + case CTYPE_NOT_DIGIT: + { + CClassNode* cc; + int ctype, not; + + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + add_ctype_to_cc(cc, ctype, 0, env); + if (not != 0) CCLASS_SET_NOT(cc); + } + break; - case CTYPE_NOT_DIGIT: - *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c); + default: + return ONIGERR_PARSER_BUG; + break; } - break; - - default: - return REGERR_PARSER_BUG; - break; } break; + case TK_CHAR_PROPERTY: + r = parse_char_property(np, tok, src, end, env); + if (r != 0) return r; + break; + case TK_CC_OPEN: r = parse_char_class(np, tok, src, end, env); if (r != 0) return r; + +#ifdef USE_FOLD_MATCH + if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) { + int res; + Node *alt_root, *work; + CClassNode* cc = &(NCCLASS(*np)); + + res = make_fold_alt_node_from_cc(env->enc, cc, &alt_root); + if (res < 0) return res; + if (res > 0) { + work = node_new_alt(*np, alt_root); + if (IS_NULL(work)) { + onig_node_free(alt_root); + return ONIGERR_MEMORY; + } + *np = work; + } + } +#endif break; case TK_ANYCHAR: *np = node_new_anychar(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + + case TK_ANYCHAR_ANYTIME: + *np = node_new_anychar(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + qn = node_new_qualifier(0, REPEAT_INFINITE, 0); + CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); + NQUALIFIER(qn).target = *np; + *np = qn; break; case TK_BACKREF: len = tok->u.backref.num; *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), env); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, env); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); break; #ifdef USE_SUBEXP_CALL case TK_CALL: *np = node_new_call(tok->u.call.name, tok->u.call.name_end); - CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); env->num_call++; break; #endif case TK_ANCHOR: - *np = regex_node_new_anchor(tok->u.anchor); + *np = onig_node_new_anchor(tok->u.anchor); break; case TK_OP_REPEAT: case TK_INTERVAL: - if (IS_SYNTAX_BV(env->syntax, REG_SYN_CONTEXT_INDEP_OPS)) { - if (IS_SYNTAX_BV(env->syntax, REG_SYN_CONTEXT_INVALID_OPS)) - return REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; else *np = node_new_empty(); } @@ -4047,50 +4635,58 @@ parse_exp(Node** np, RegToken* tok, int term, break; default: - return REGERR_PARSER_BUG; + return ONIGERR_PARSER_BUG; break; } - re_entry: - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - - repeat: - if (r == TK_OP_REPEAT || r == TK_INTERVAL) { - if (is_invalid_qualifier_target(*np)) - return REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + { + targetp = np; - qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); - CHECK_NULL_RETURN_VAL(qn, REGERR_MEMORY); - NQUALIFIER(qn).greedy = tok->u.repeat.greedy; - r = set_qualifier(qn, *np, group, env); + re_entry: + r = fetch_token(tok, src, end, env); if (r < 0) return r; - if (tok->u.repeat.possessive != 0) { - Node* en; - en = node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, REGERR_MEMORY); - NEFFECT(en).target = qn; - qn = en; - } + repeat: + if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + if (is_invalid_qualifier_target(*targetp)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; - if (r == 0) { - *np = qn; - } - else if (r == 2) { /* split case: /abc+/ */ - Node* target = *np; - *np = node_new_list(target, NULL); - NCONS(*np).right = node_new_list(qn, NULL); + qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper, + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); + NQUALIFIER(qn).greedy = tok->u.repeat.greedy; + r = set_qualifier(qn, *targetp, group, env); + if (r < 0) return r; + + if (tok->u.repeat.possessive != 0) { + Node* en; + en = node_new_effect(EFFECT_STOP_BACKTRACK); + CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); + NEFFECT(en).target = qn; + qn = en; + } + + if (r == 0) { + *targetp = qn; + } + else if (r == 2) { /* split case: /abc+/ */ + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY); + tmp = NCONS(*targetp).right = node_new_list(qn, NULL); + CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY); + targetp = &(NCONS(tmp).left); + } + goto re_entry; } - goto re_entry; } return r; } static int -parse_branch(Node** top, RegToken* tok, int term, +parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { int r; @@ -4127,7 +4723,7 @@ parse_branch(Node** top, RegToken* tok, int term, /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ static int -parse_subexp(Node** top, RegToken* tok, int term, +parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { int r; @@ -4135,7 +4731,10 @@ parse_subexp(Node** top, RegToken* tok, int term, *top = NULL; r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (r == term) { *top = node; @@ -4159,9 +4758,9 @@ parse_subexp(Node** top, RegToken* tok, int term, else { err: if (term == TK_SUBEXP_CLOSE) - return REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; else - return REGERR_PARSER_BUG; + return ONIGERR_PARSER_BUG; } return r; @@ -4171,7 +4770,7 @@ static int parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) { int r; - RegToken tok; + OnigToken tok; r = fetch_token(&tok, src, end, env); if (r < 0) return r; @@ -4181,13 +4780,13 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) } extern int -regex_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg, +onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env) { int r; UChar* p; -#ifdef USE_NAMED_SUBEXP +#ifdef USE_NAMED_GROUP names_clear(reg); #endif @@ -4207,7 +4806,7 @@ regex_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg, } extern void -regex_scan_env_set_error_string(ScanEnv* env, int ecode, +onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end) { env->error = arg; diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h index 5a073623c9..b2726becbd 100644 --- a/ext/mbstring/oniguruma/regparse.h +++ b/ext/mbstring/oniguruma/regparse.h @@ -2,7 +2,7 @@ regparse.h - Oniguruma (regular expression library) - Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #ifndef REGPARSE_H @@ -64,6 +64,7 @@ #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW +#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW #define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) #define NSTRING_IS_CASE_AMBIG(node) \ @@ -72,6 +73,14 @@ #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); +#define CCLASS_SET_NOT(cc) (cc)->not = 1 + +#define NQ_TARGET_ISNOT_EMPTY 0 +#define NQ_TARGET_IS_EMPTY 1 +#define NQ_TARGET_IS_EMPTY_MEM 2 +#define NQ_TARGET_IS_EMPTY_REC 3 + + typedef struct { UChar* s; UChar* end; @@ -92,23 +101,26 @@ typedef struct { int upper; int greedy; int by_number; /* {n,m} */ - int target_may_empty; /* target can match with empty data */ + int target_empty_info; struct _Node* head_exact; struct _Node* next_head_exact; int is_refered; /* include called node. don't eliminate even if {0} */ } QualifierNode; /* status bits */ -#define NST_RECURSION (1<<0) -#define NST_CALLED (1<<1) -#define NST_ADDR_FIXED (1<<2) -#define NST_MIN_FIXED (1<<3) -#define NST_MAX_FIXED (1<<4) -#define NST_CLEN_FIXED (1<<5) -#define NST_MARK1 (1<<6) -#define NST_MARK2 (1<<7) -#define NST_MEM_BACKREFED (1<<8) -#define NST_SIMPLE_REPEAT (1<<9) /* for stop backtrack optimization */ +#define NST_MIN_FIXED (1<<0) +#define NST_MAX_FIXED (1<<1) +#define NST_CLEN_FIXED (1<<2) +#define NST_MARK1 (1<<3) +#define NST_MARK2 (1<<4) +#define NST_MEM_BACKREFED (1<<5) +#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */ + +#define NST_RECURSION (1<<7) +#define NST_CALLED (1<<8) +#define NST_ADDR_FIXED (1<<9) +#define NST_NAMED_GROUP (1<<10) +#define NST_NAME_REF (1<<11) #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) @@ -122,20 +134,23 @@ typedef struct { #define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) #define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) #define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0) +#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) +#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) typedef struct { int state; int type; int regnum; - RegOptionType option; + OnigOptionType option; struct _Node* target; AbsAddrType call_addr; /* for multiple call reference */ - RegDistance min_len; /* min length (byte) */ - RegDistance max_len; /* max length (byte) */ + OnigDistance min_len; /* min length (byte) */ + OnigDistance max_len; /* max length (byte) */ int char_len; /* character length */ int opt_count; /* referenced count in optimize_node_left() */ } EffectNode; @@ -209,10 +224,12 @@ typedef struct _Node { (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) typedef struct { - RegOptionType option; - RegCharEncoding enc; - RegSyntaxType* syntax; - BitStatusType backtrack_mem; + OnigOptionType option; + OnigEncoding enc; + OnigSyntaxType* syntax; + BitStatusType capture_history; + BitStatusType bt_mem_start; + BitStatusType bt_mem_end; BitStatusType backrefed_mem; UChar* pattern; UChar* pattern_end; @@ -224,6 +241,9 @@ typedef struct { UnsetAddrList* unset_addr_list; #endif int num_mem; +#ifdef USE_NAMED_GROUP + int num_named; +#endif int mem_alloc; Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; Node** mem_nodes_dynamic; @@ -234,21 +254,23 @@ typedef struct { #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) - -extern void regex_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); -extern int regex_scan_unsigned_number P_((UChar** src, UChar* end, RegCharEncoding enc)); -extern void regex_node_conv_to_str_node P_((Node* node, int raw)); -extern int regex_node_str_cat P_((Node* node, UChar* s, UChar* end)); -extern void regex_node_free P_((Node* node)); -extern Node* regex_node_new_effect P_((int type)); -extern Node* regex_node_new_anchor P_((int type)); -extern int regex_free_node_list(); -extern int regex_names_free P_((regex_t* reg)); -extern int regex_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env)); - -#ifdef REG_DEBUG -#ifdef USE_NAMED_SUBEXP -extern int regex_print_names(FILE*, regex_t*); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int onig_strncmp P_((UChar* s1, UChar* s2, int n)); +extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); +extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc)); +extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode)); +extern void onig_node_conv_to_str_node P_((Node* node, int raw)); +extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end)); +extern void onig_node_free P_((Node* node)); +extern Node* onig_node_new_effect P_((int type)); +extern Node* onig_node_new_anchor P_((int type)); +extern int onig_free_node_list(); +extern int onig_names_free P_((regex_t* reg)); +extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env)); + +#ifdef ONIG_DEBUG +#ifdef USE_NAMED_GROUP +extern int onig_print_names(FILE*, regex_t*); #endif #endif diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c index 007e7b65c0..533f813c0c 100644 --- a/ext/mbstring/oniguruma/regposerr.c +++ b/ext/mbstring/oniguruma/regposerr.c @@ -2,7 +2,7 @@ regposerr.c - Oniguruma (regular expression library) - Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "config.h" diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index ad22338132..3604ccfdbf 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) - Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ @@ -17,7 +17,7 @@ #if 1 #define ENC_STRING_LEN(enc,s,len) do { \ UChar* tmps = (UChar* )(s); \ - /* while (*tmps != 0) tmps += mblen(enc,*tmps); */ \ + /* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \ while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \ len = tmps - (UChar* )(s); \ } while(0) @@ -34,57 +34,65 @@ static int onig2posix_error_code(int code) { static O2PERR o2p[] = { - { REG_MISMATCH, REG_NOMATCH }, - { REG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, - { REGERR_MEMORY, REG_ESPACE }, - { REGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, - { REGERR_TYPE_BUG, REG_EONIG_INTERNAL }, - { REGERR_PARSER_BUG, REG_EONIG_INTERNAL }, - { REGERR_STACK_BUG, REG_EONIG_INTERNAL }, - { REGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, - { REGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, - { REGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, - { REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, - { REGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, - { REGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, - { REGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, - { REGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, - { REGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE }, - { REGERR_END_PATTERN_AT_META, REG_EESCAPE }, - { REGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, - { REGERR_META_CODE_SYNTAX, REG_BADPAT }, - { REGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, - { REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, - { REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, - { REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, - { REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, - { REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, - { REGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, - { REGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, - { REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, - { REGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, - { REGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, - { REGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, - { REGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, - { REGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, - { REGERR_TOO_BIG_NUMBER, REG_BADPAT }, - { REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, - { REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, - { REGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, - { REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, - { REGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, - { REGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, - { REGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, - { REGERR_INVALID_BACKREF, REG_ESUBREG }, - { REGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { REGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { REGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { REGERR_INVALID_SUBEXP_NAME, REG_BADPAT }, - { REGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, - { REGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, - { REGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, - { REGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, - { REGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } + { ONIG_MISMATCH, REG_NOMATCH }, + { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, + { ONIGERR_MEMORY, REG_ESPACE }, + { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, + { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, + { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, + { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, + { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, + { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, + { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, + { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, + { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, + { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, + { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, + { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, + { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, + { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, + { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, + { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, + { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, + { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, + { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, + { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, + { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } + }; int i; @@ -103,26 +111,27 @@ extern int regcomp(regex_t* reg, const char* pattern, int posix_options) { int r, len; - RegSyntaxType* syntax = RegDefaultSyntax; - RegOptionType options; + OnigSyntaxType* syntax = OnigDefaultSyntax; + OnigOptionType options; if ((posix_options & REG_EXTENDED) == 0) - syntax = REG_SYNTAX_POSIX_BASIC; + syntax = ONIG_SYNTAX_POSIX_BASIC; options = syntax->options; if ((posix_options & REG_ICASE) != 0) - REG_OPTION_ON(options, REG_OPTION_IGNORECASE); + ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); if ((posix_options & REG_NEWLINE) != 0) { - REG_OPTION_ON( options, REG_OPTION_NEGATE_SINGLELINE); - REG_OPTION_OFF(options, REG_OPTION_SINGLELINE); + ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); + ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); } reg->comp_options = posix_options; - ENC_STRING_LEN(RegDefaultCharEncoding, pattern, len); - r = regex_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), - options, RegDefaultCharEncoding, syntax, (RegErrorInfo* )NULL); - if (r != REG_NORMAL) { + ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); + r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), + options, OnigEncDefaultCharEncoding, syntax, + (OnigErrorInfo* )NULL); + if (r != ONIG_NORMAL) { return onig2posix_error_code(r); } @@ -136,11 +145,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch, { int r, i, len; UChar* end; - RegOptionType options; + OnigOptionType options; - options = REG_OPTION_POSIX_REGION; - if ((posix_options & REG_NOTBOL) != 0) options |= REG_OPTION_NOTBOL; - if ((posix_options & REG_NOTEOL) != 0) options |= REG_OPTION_NOTEOL; + options = ONIG_OPTION_POSIX_REGION; + if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; + if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; if ((reg->comp_options & REG_NOSUB) != 0) { pmatch = (regmatch_t* )NULL; @@ -149,16 +158,16 @@ regexec(regex_t* reg, const char* str, size_t nmatch, ENC_STRING_LEN(ONIG_C(reg)->code,str,len); end = (UChar* )(str + len); - r = regex_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, - (RegRegion* )pmatch, options); + r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, + (OnigRegion* )pmatch, options); if (r >= 0) { r = 0; /* Match */ } - else if (r == REG_MISMATCH) { + else if (r == ONIG_MISMATCH) { r = REG_NOMATCH; for (i = 0; i < nmatch; i++) - pmatch[i].rm_so = pmatch[i].rm_eo = REG_REGION_NOTPOS; + pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; } else { r = onig2posix_error_code(r); @@ -170,26 +179,74 @@ regexec(regex_t* reg, const char* str, size_t nmatch, extern void regfree(regex_t* reg) { - regex_free(ONIG_C(reg)); + onig_free(ONIG_C(reg)); } extern void reg_set_encoding(int mb_code) { - RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code]; + OnigEncoding enc; + + switch (mb_code) { + case REG_POSIX_ENCODING_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case REG_POSIX_ENCODING_EUC_JP: + enc = ONIG_ENCODING_EUC_JP; + break; + case REG_POSIX_ENCODING_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case REG_POSIX_ENCODING_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); } extern int reg_name_to_group_numbers(regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums) { - return regex_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); + return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); +} + +typedef struct { + int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; +} i_wrap; + +static int i_wrapper(unsigned char* name, unsigned char* name_end, + int ng, int* gs, + onig_regex_t* reg, void* arg) +{ + i_wrap* warg = (i_wrap* )arg; + + return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); +} + +extern int +reg_foreach_name(regex_t* reg, + int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), + void* arg) +{ + i_wrap warg; + + warg.func = func; + warg.reg = reg; + warg.arg = arg; + + return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); } extern int -reg_foreach_name(regex_t* reg, int (*func)(unsigned char*,int,int*,void*), - void* arg) +reg_number_of_names(regex_t* reg) { - return regex_foreach_name(ONIG_C(reg), func, arg); + return onig_number_of_names(ONIG_C(reg)); } diff --git a/ext/mbstring/oniguruma/sample/names.c b/ext/mbstring/oniguruma/sample/names.c deleted file mode 100644 index 1ebc4e856c..0000000000 --- a/ext/mbstring/oniguruma/sample/names.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * names.c -- example of group name callback. - */ -#include -#include "oniguruma.h" - -static int -name_callback(UChar* name, int ngroup_num, int* group_nums, void* arg) -{ - int i, gn; - RegRegion *region = (RegRegion* )arg; - - for (i = 0; i < ngroup_num; i++) { - gn = group_nums[i]; - fprintf(stderr, "%s (%d): ", name, gn); - fprintf(stderr, "(%d-%d)\n", region->beg[gn], region->end[gn]); - } - return 0; /* 0: continue */ -} - -extern int main(int argc, char* argv[]) -{ - int r; - unsigned char *start, *range, *end; - regex_t* reg; - RegErrorInfo einfo; - RegRegion *region; - - static unsigned char* pattern = "(?a*)(?b*)(?c*)"; - static unsigned char* str = "aaabbbbcc"; - - r = regex_new(®, pattern, pattern + strlen(pattern), - REG_OPTION_DEFAULT, REGCODE_ASCII, REG_SYNTAX_DEFAULT, &einfo); - if (r != REG_NORMAL) { - char s[REG_MAX_ERROR_MESSAGE_LEN]; - regex_error_code_to_str(s, r, &einfo); - fprintf(stderr, "ERROR: %s\n", s); - exit(-1); - } - - region = regex_region_new(); - - end = str + strlen(str); - start = str; - range = end; - r = regex_search(reg, str, end, start, range, region, REG_OPTION_NONE); - if (r >= 0) { - fprintf(stderr, "match at %d\n\n", r); - r = regex_foreach_name(reg, name_callback, (void* )region); - } - else if (r == REG_MISMATCH) { - fprintf(stderr, "search fail\n"); - } - else { /* error */ - char s[REG_MAX_ERROR_MESSAGE_LEN]; - regex_error_code_to_str(s, r); - exit(-1); - } - - regex_region_free(region, 1 /* 1:free self, 0:free contents only */); - regex_free(reg); - regex_end(); - return 0; -} diff --git a/ext/mbstring/oniguruma/sample/posix.c b/ext/mbstring/oniguruma/sample/posix.c deleted file mode 100644 index ff20292cb0..0000000000 --- a/ext/mbstring/oniguruma/sample/posix.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * posix.c - */ -#include -#include "onigposix.h" - -static int x(regex_t* reg, unsigned char* pattern, unsigned char* str) -{ - int r, i; - char buf[200]; - regmatch_t pmatch[20]; - - r = regexec(reg, str, reg->re_nsub + 1, pmatch, 0); - if (r != 0 && r != REG_NOMATCH) { - regerror(r, reg, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\n", buf); - exit(-1); - } - - if (r == REG_NOMATCH) { - fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str); - } - else { - fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str); - for (i = 0; i <= reg->re_nsub; i++) { - fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo); - } - } - return 0; -} - -extern int main(int argc, char* argv[]) -{ - int r; - char buf[200]; - regex_t reg; - unsigned char* pattern; - - /* default syntax (REG_SYNTAX_RUBY) */ - pattern = "^a+b{2,7}[c-f]?$|uuu"; - r = regcomp(®, pattern, REG_EXTENDED); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\n", buf); - exit(-1); - } - x(®, pattern, "aaabbbbd"); - - /* POSIX Basic RE (REG_EXTENDED is not specified.) */ - pattern = "^a+b{2,7}[c-f]?|uuu"; - r = regcomp(®, pattern, 0); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\n", buf); - exit(-1); - } - x(®, pattern, "a+b{2,7}d?|uuu"); - - /* POSIX Basic RE (REG_EXTENDED is not specified.) */ - pattern = "^a*b\\{2,7\\}\\([c-f]\\)$"; - r = regcomp(®, pattern, 0); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\n", buf); - exit(-1); - } - x(®, pattern, "aaaabbbbbbd"); - - /* POSIX Extended RE */ - regex_set_default_syntax(REG_SYNTAX_POSIX_EXTENDED); - pattern = "^a+b{2,7}[c-f]?)$|uuu"; - r = regcomp(®, pattern, REG_EXTENDED); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\n", buf); - exit(-1); - } - x(®, pattern, "aaabbbbd)"); - - pattern = "^b."; - r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\n", buf); - exit(-1); - } - x(®, pattern, "a\nb\n"); - - regfree(®); - regex_end(); - return 0; -} diff --git a/ext/mbstring/oniguruma/sample/simple.c b/ext/mbstring/oniguruma/sample/simple.c deleted file mode 100644 index 89498bac11..0000000000 --- a/ext/mbstring/oniguruma/sample/simple.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * simple.c - */ -#include -#include "oniguruma.h" - -extern int main(int argc, char* argv[]) -{ - int r; - unsigned char *start, *range, *end; - regex_t* reg; - RegErrorInfo einfo; - RegRegion *region; - - static unsigned char* pattern = "a(.*)b|[e-f]+"; - static unsigned char* str = "zzzzaffffffffb"; - - r = regex_new(®, pattern, pattern + strlen(pattern), - REG_OPTION_DEFAULT, REGCODE_ASCII, REG_SYNTAX_DEFAULT, &einfo); - if (r != REG_NORMAL) { - char s[REG_MAX_ERROR_MESSAGE_LEN]; - regex_error_code_to_str(s, r, &einfo); - fprintf(stderr, "ERROR: %s\n", s); - exit(-1); - } - - region = regex_region_new(); - - end = str + strlen(str); - start = str; - range = end; - r = regex_search(reg, str, end, start, range, region, REG_OPTION_NONE); - if (r >= 0) { - int i; - - fprintf(stderr, "match at %d\n", r); - for (i = 0; i < region->num_regs; i++) { - fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); - } - } - else if (r == REG_MISMATCH) { - fprintf(stderr, "search fail\n"); - } - else { /* error */ - char s[REG_MAX_ERROR_MESSAGE_LEN]; - regex_error_code_to_str(s, r); - exit(-1); - } - - regex_region_free(region, 1 /* 1:free self, 0:free contents only */); - regex_free(reg); - regex_end(); - return 0; -} diff --git a/ext/mbstring/oniguruma/test.rb b/ext/mbstring/oniguruma/test.rb deleted file mode 100644 index 2c69344407..0000000000 --- a/ext/mbstring/oniguruma/test.rb +++ /dev/null @@ -1,971 +0,0 @@ -# test.rb -# Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) - -def pr(result, reg, str, n = 0, *range) - printf("%s /%s/:'%s'", result, reg.source, str) - if (n.class == Fixnum) - printf(":%d", n) if n != 0 - if (range.size > 0) - if (range[3].nil?) - printf(" (%d-%d : X-X)", range[0], range[1]) - else - printf(" (%d-%d : %d-%d)", range[0], range[1], range[2], range[3]) - end - end - else - printf(" %s", n) - end - printf("\n") -end - -def rok(result_opt, reg, str, n = 0, *range) - result = "OK" + result_opt - result += " " * (7 - result.length) - pr(result, reg, str, n, *range) - $rok += 1 -end - -def rfail(result_opt, reg, str, n = 0, *range) - result = "FAIL" + result_opt - result += " " * (7 - result.length) - pr(result, reg, str, n, *range) - $rfail += 1 -end - -def x(reg, str, s, e, n = 0) - m = reg.match(str) - if m - if (m.size() <= n) - rfail("(%d)" % (m.size()-1), reg, str, n) - else - if (m.begin(n) == s && m.end(n) == e) - rok("", reg, str, n) - else - rfail("", reg, str, n, s, e, m.begin(n), m.end(n)) - end - end - else - rfail("", reg, str, n) - end -end - -def n(reg, str) - m = reg.match(str) - if m - rfail("(N)", reg, str, 0) - else - rok("(N)", reg, str, 0) - end -end - -def r(reg, str, index, pos = nil) - if (pos) - res = str.rindex(reg, pos) - else - res = str.rindex(reg) - end - if res - if (res == index) - rok("(r)", reg, str) - else - rfail("(r)", reg, str, [res, '-', index]) - end - else - rfail("(r)", reg, str) - end -end - -def i(reg, str, s = 0, e = 0, n = 0) - # ignore -end - -### main ### -$rok = $rfail = 0 - - -def test_sb(enc) -$KCODE = enc - - -x(//, '', 0, 0) -x(/^/, '', 0, 0) -x(/$/, '', 0, 0) -x(/\G/, '', 0, 0) -x(/\A/, '', 0, 0) -x(/\Z/, '', 0, 0) -x(/\z/, '', 0, 0) -x(/^$/, '', 0, 0) -x(/\ca/, "\001", 0, 1) -x(/\C-b/, "\002", 0, 1) -x(/\M-Z/, "\xDA", 0, 1) -x(//, 'a', 0, 0) -x(/a/, 'a', 0, 1) -x(/aa/, 'aa', 0, 2) -x(/aaa/, 'aaa', 0, 3) -x(/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 0, 35) -x(/ab/, 'ab', 0, 2) -x(/b/, 'ab', 1, 2) -x(/bc/, 'abc', 1, 3) -x(/\17/, "\017", 0, 1) -x(/\x1f/, "\x1f", 0, 1) -x(/\xFE/, "\xfe", 0, 1) -x(/a(?#....\\JJJJ)b/, 'ab', 0, 2) -x(/./, 'a', 0, 1) -n(/./, '') -x(/../, 'ab', 0, 2) -x(/\w/, 'e', 0, 1) -n(/\W/, 'e') -x(/\s/, ' ', 0, 1) -x(/\S/, 'b', 0, 1) -x(/\d/, '4', 0, 1) -n(/\D/, '4') -x(/\b/, 'z ', 0, 0) -x(/\b/, ' z', 1, 1) -x(/\B/, 'zz ', 1, 1) -x(/\B/, 'z ', 2, 2) -x(/\B/, ' z', 0, 0) -x(/[ab]/, 'b', 0, 1) -n(/[ab]/, 'c') -x(/[a-z]/, 't', 0, 1) -n(/[^a]/, 'a') -x(/[^a]/, "\n", 0, 1) -x(/[]]/, ']', 0, 1) -n(/[^]]/, ']') -x(/[b-]/, 'b', 0, 1) -x(/[b-]/, '-', 0, 1) -x(/[\w]/, 'z', 0, 1) -n(/[\w]/, ' ') -x(/[\d]/, '5', 0, 1) -n(/[\d]/, 'e') -x(/[\D]/, 't', 0, 1) -n(/[\D]/, '3') -x(/[\s]/, ' ', 0, 1) -n(/[\s]/, 'a') -x(/[\S]/, 'b', 0, 1) -n(/[\S]/, ' ') -x(/[\w\d]/, '2', 0, 1) -n(/[\w\d]/, ' ') -x(/[[:upper:]]/, 'B', 0, 1) -x(/[*[:xdigit:]+]/, '+', 0, 1) -x(/[*[:xdigit:]+]/, 'GHIKK-9+*', 6, 7) -x(/[*[:xdigit:]+]/, '-@^+', 3, 4) -n(/[[:upper]]/, 'A') -x(/[[:upper]]/, ':', 0, 1) -x(/[\044-\047]/, "\046", 0, 1) -x(/[\x5a-\x5c]/, "\x5b", 0, 1) -x(/[\x6A-\x6D]/, "\x6c", 0, 1) -n(/[\x6A-\x6D]/, "\x6E") -n(/^[0-9A-F]+ 0+ UNDEF /, '75F 00000000 SECT14A notype () External | _rb_apply') -x(/[\[]/, '[', 0, 1) -x(/[\]]/, ']', 0, 1) -x(/[&]/, '&', 0, 1) -x(/[[ab]]/, 'b', 0, 1) -x(/[[ab]c]/, 'c', 0, 1) -n(/[[^a]]/, 'a') -n(/[^[a]]/, 'a') -x(/[[ab]&&bc]/, 'b', 0, 1) -n(/[[ab]&&bc]/, 'a') -n(/[[ab]&&bc]/, 'c') -x(/[a-z&&b-y&&c-x]/, 'w', 0, 1) -n(/[^a-z&&b-y&&c-x]/, 'w') -x(/[[^a&&a]&&a-z]/, 'b', 0, 1) -n(/[[^a&&a]&&a-z]/, 'a') -x(/[[^a-z&&bcdef]&&[^c-g]]/, 'h', 0, 1) -n(/[[^a-z&&bcdef]&&[^c-g]]/, 'c') -x(/[^[^abc]&&[^cde]]/, 'c', 0, 1) -x(/[^[^abc]&&[^cde]]/, 'e', 0, 1) -n(/[^[^abc]&&[^cde]]/, 'f') -x(/[a-&&-a]/, '-', 0, 1) -n(/[a-&&-a]/, '&') -n(/\wabc/, ' abc') -x(/a\Wbc/, 'a bc', 0, 4) -x(/a.b.c/, 'aabbc', 0, 5) -x(/.\wb\W..c/, 'abb bcc', 0, 7) -x(/\s\wzzz/, ' zzzz', 0, 5) -x(/aa.b/, 'aabb', 0, 4) -n(/.a/, 'ab') -x(/.a/, 'aa', 0, 2) -x(/^a/, 'a', 0, 1) -x(/^a$/, 'a', 0, 1) -x(/^\w$/, 'a', 0, 1) -n(/^\w$/, ' ') -x(/^\wab$/, 'zab', 0, 3) -x(/^\wabcdef$/, 'zabcdef', 0, 7) -x(/^\w...def$/, 'zabcdef', 0, 7) -x(/\w\w\s\Waaa\d/, 'aa aaa4', 0, 8) -x(/\A\Z/, '', 0, 0) -x(/\Axyz/, 'xyz', 0, 3) -x(/xyz\Z/, 'xyz', 0, 3) -x(/xyz\z/, 'xyz', 0, 3) -x(/\Gaz/, 'az', 0, 2) -n(/\Gz/, 'bza') -n(/az\G/, 'az') -n(/az\A/, 'az') -n(/a\Az/, 'az') -x(/\^\$/, '^$', 0, 2) -x(/\w/, '_', 0, 1) -n(/\W/, '_') -x(/(?=z)z/, 'z', 0, 1) -n(/(?=z)./, 'a') -x(/(?!z)a/, 'a', 0, 1) -n(/(?!z)a/, 'z') -x(/(?i:a)/, 'a', 0, 1) -x(/(?i:a)/, 'A', 0, 1) -x(/(?i:A)/, 'a', 0, 1) -n(/(?i:A)/, 'b') -x(/(?i:[A-Z])/, 'a', 0, 1) -x(/(?i:[f-m])/, 'H', 0, 1) -x(/(?i:[f-m])/, 'h', 0, 1) -n(/(?i:[f-m])/, 'e') -n(/(?i:[A-c])/, 'D') # changed spec. 2003/02/07 -n(/(?i:[a-C])/, 'D') # changed spec. 2003/02/07 -n(/(?i:[b-C])/, 'A') -x(/(?i:[a-C])/, 'B', 0, 1) -n(/(?i:[c-X])/, '[') -n(/(?i:[!-k])/, 'Z') -x(/(?i:[!-k])/, '7', 0, 1) -n(/(?i:[T-}])/, 'b') -x(/(?i:[T-}])/, '{', 0, 1) -x(/(?i:\?a)/, '?A', 0, 2) -x(/(?i:\*A)/, '*a', 0, 2) -n(/./, "\n") -x(/(?m:.)/, "\n", 0, 1) -x(/(?m:a.)/, "a\n", 0, 2) -x(/(?m:.b)/, "a\nb", 1, 3) -x(/a?/, '', 0, 0) -x(/a?/, 'b', 0, 0) -x(/a?/, 'a', 0, 1) -x(/a*/, '', 0, 0) -x(/a*/, 'a', 0, 1) -x(/a*/, 'aaa', 0, 3) -x(/a*/, 'baaaa', 0, 0) -n(/a+/, '') -x(/a+/, 'a', 0, 1) -x(/a+/, 'aaaa', 0, 4) -x(/a+/, 'aabbb', 0, 2) -x(/a+/, 'baaaa', 1, 5) -x(/.?/, '', 0, 0) -x(/.?/, 'f', 0, 1) -x(/.?/, "\n", 0, 0) -x(/.*/, '', 0, 0) -x(/.*/, 'abcde', 0, 5) -x(/.+/, 'z', 0, 1) -x(/.+/, "zdswer\n", 0, 6) -x(/a|b/, 'a', 0, 1) -x(/a|b/, 'b', 0, 1) -x(/|a/, 'a', 0, 0) -x(/(|a)/, 'a', 0, 0) -x(/ab|bc/, 'ab', 0, 2) -x(/ab|bc/, 'bc', 0, 2) -x(/z(?:ab|bc)/, 'zbc', 0, 3) -x(/a(?:ab|bc)c/, 'aabc', 0, 4) -x(/ab|(?:ac|az)/, 'az', 0, 2) -x(/a|b|c/, 'dc', 1, 2) -x(/a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz/, 'pqr', 0, 2) -n(/a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz/, 'mn') -x(/a|^z/, 'ba', 1, 2) -x(/a|^z/, 'za', 0, 1) -x(/a|\Gz/, 'bza', 2, 3) -x(/a|\Gz/, 'za', 0, 1) -x(/a|\Az/, 'bza', 2, 3) -x(/a|\Az/, 'za', 0, 1) -x(/a|b\Z/, 'ba', 1, 2) -x(/a|b\Z/, 'b', 0, 1) -x(/a|b\z/, 'ba', 1, 2) -x(/a|b\z/, 'b', 0, 1) -x(/\w|\s/, ' ', 0, 1) -n(/\w|\w/, ' ') -x(/\w|%/, '%', 0, 1) -x(/\w|[&$]/, '&', 0, 1) -x(/[b-d]|[^e-z]/, 'a', 0, 1) -x(/(?:a|[c-f])|bz/, 'dz', 0, 1) -x(/(?:a|[c-f])|bz/, 'bz', 0, 2) -x(/abc|(?=zz)..f/, 'zzf', 0, 3) -x(/abc|(?!zz)..f/, 'abf', 0, 3) -x(/(?=za)..a|(?=zz)..a/, 'zza', 0, 3) -n(/(?>a|abd)c/, 'abdc') -x(/(?>abd|a)c/, 'abdc', 0, 4) -x(/a?|b/, 'a', 0, 1) -x(/a?|b/, 'b', 0, 0) -x(/a?|b/, '', 0, 0) -x(/a*|b/, 'aa', 0, 2) -x(/a*|b*/, 'ba', 0, 0) -x(/a*|b*/, 'ab', 0, 1) -x(/a+|b*/, '', 0, 0) -x(/a+|b*/, 'bbb', 0, 3) -x(/a+|b*/, 'abbb', 0, 1) -n(/a+|b+/, '') -x(/(a|b)?/, 'b', 0, 1) -x(/(a|b)*/, 'ba', 0, 2) -x(/(a|b)+/, 'bab', 0, 3) -x(/(ab|ca)+/, 'caabbc', 0, 4) -x(/(ab|ca)+/, 'aabca', 1, 5) -x(/(ab|ca)+/, 'abzca', 0, 2) -x(/(a|bab)+/, 'ababa', 0, 5) -x(/(a|bab)+/, 'ba', 1, 2) -x(/(a|bab)+/, 'baaaba', 1, 4) -x(/(?:a|b)(?:a|b)/, 'ab', 0, 2) -x(/(?:a*|b*)(?:a*|b*)/, 'aaabbb', 0, 3) -x(/(?:a*|b*)(?:a+|b+)/, 'aaabbb', 0, 6) -x(/(?:a+|b+){2}/, 'aaabbb', 0, 6) -x(/h{0,}/, 'hhhh', 0, 4) -x(/(?:a+|b+){1,2}/, 'aaabbb', 0, 6) -x(/(?:a+|\Ab*)cc/, 'cc', 0, 2) -n(/(?:a+|\Ab*)cc/, 'abcc') -x(/(?:^a+|b+)*c/, 'aabbbabc', 6, 8) -x(/(?:^a+|b+)*c/, 'aabbbbc', 0, 7) -x(/a|(?i)c/, 'C', 0, 1) -x(/(?i)c|a/, 'C', 0, 1) -i(/(?i)c|a/, 'A', 0, 1) # different spec. -x(/(?i:c)|a/, 'C', 0, 1) -n(/(?i:c)|a/, 'A') -x(/[abc]?/, 'abc', 0, 1) -x(/[abc]*/, 'abc', 0, 3) -x(/[^abc]*/, 'abc', 0, 0) -n(/[^abc]+/, 'abc') -x(/a??/, 'aaa', 0, 0) -x(/ba??b/, 'bab', 0, 3) -x(/a*?/, 'aaa', 0, 0) -x(/ba*?/, 'baa', 0, 1) -x(/ba*?b/, 'baab', 0, 4) -x(/a+?/, 'aaa', 0, 1) -x(/ba+?/, 'baa', 0, 2) -x(/ba+?b/, 'baab', 0, 4) -x(/(?:a?)??/, 'a', 0, 0) -x(/(?:a??)?/, 'a', 0, 0) -x(/(?:a?)+?/, 'aaa', 0, 1) -x(/(?:a+)??/, 'aaa', 0, 0) -x(/(?:a+)??b/, 'aaab', 0, 4) -i(/(?:ab)?{2}/, '', 0, 0) # GNU regex bug -x(/(?:ab)?{2}/, 'ababa', 0, 4) -x(/(?:ab)*{0}/, 'ababa', 0, 0) -x(/(?:ab){3,}/, 'abababab', 0, 8) -n(/(?:ab){3,}/, 'abab') -x(/(?:ab){2,4}/, 'ababab', 0, 6) -x(/(?:ab){2,4}/, 'ababababab', 0, 8) -x(/(?:ab){2,4}?/, 'ababababab', 0, 4) -x(/(?:ab){,}/, 'ab{,}', 0, 5) -x(/(?:abc)+?{2}/, 'abcabcabc', 0, 6) -x(/(?:X*)(?i:xa)/, 'XXXa', 0, 4) -x(/(d+)([^abc]z)/, 'dddz', 0, 4) -x(/([^abc]*)([^abc]z)/, 'dddz', 0, 4) -x(/(\w+)(\wz)/, 'dddz', 0, 4) -x(/(a)/, 'a', 0, 1, 1) -x(/(ab)/, 'ab', 0, 2, 1) -x(/((ab))/, 'ab', 0, 2) -x(/((ab))/, 'ab', 0, 2, 1) -x(/((ab))/, 'ab', 0, 2, 2) -x(/((((((((((((((((((((ab))))))))))))))))))))/, 'ab', 0, 2, 20) -x(/(ab)(cd)/, 'abcd', 0, 2, 1) -x(/(ab)(cd)/, 'abcd', 2, 4, 2) -x(/()(a)bc(def)ghijk/, 'abcdefghijk', 3, 6, 3) -x(/(()(a)bc(def)ghijk)/, 'abcdefghijk', 3, 6, 4) -x(/(^a)/, 'a', 0, 1) -x(/(a)|(a)/, 'ba', 1, 2, 1) -x(/(^a)|(a)/, 'ba', 1, 2, 2) -x(/(a?)/, 'aaa', 0, 1, 1) -x(/(a*)/, 'aaa', 0, 3, 1) -x(/(a*)/, '', 0, 0, 1) -x(/(a+)/, 'aaaaaaa', 0, 7, 1) -x(/(a+|b*)/, 'bbbaa', 0, 3, 1) -x(/(a+|b?)/, 'bbbaa', 0, 1, 1) -x(/(abc)?/, 'abc', 0, 3, 1) -x(/(abc)*/, 'abc', 0, 3, 1) -x(/(abc)+/, 'abc', 0, 3, 1) -x(/(xyz|abc)+/, 'abc', 0, 3, 1) -x(/([xyz][abc]|abc)+/, 'abc', 0, 3, 1) -x(/((?i:abc))/, 'AbC', 0, 3, 1) -x(/(abc)(?i:\1)/, 'abcABC', 0, 6) -x(/((?m:a.c))/, "a\nc", 0, 3, 1) -x(/((?=az)a)/, 'azb', 0, 1, 1) -x(/abc|(.abd)/, 'zabd', 0, 4, 1) -x(/(?:abc)|(ABC)/, 'abc', 0, 3) -x(/(?i:(abc))|(zzz)/, 'ABC', 0, 3, 1) -x(/a*(.)/, 'aaaaz', 4, 5, 1) -x(/a*?(.)/, 'aaaaz', 0, 1, 1) -x(/a*?(c)/, 'aaaac', 4, 5, 1) -x(/[bcd]a*(.)/, 'caaaaz', 5, 6, 1) -x(/(\Abb)cc/, 'bbcc', 0, 2, 1) -n(/(\Abb)cc/, 'zbbcc') -x(/(^bb)cc/, 'bbcc', 0, 2, 1) -n(/(^bb)cc/, 'zbbcc') -x(/cc(bb$)/, 'ccbb', 2, 4, 1) -n(/cc(bb$)/, 'ccbbb') -#n(/\1/, 'a') # compile error on Oniguruma -n(/(\1)/, '') -n(/\1(a)/, 'aa') -n(/(a(b)\1)\2+/, 'ababb') -n(/(?:(?:\1|z)(a))+$/, 'zaa') -x(/(?:(?:\1|z)(a))+$/, 'zaaa', 0, 4) -x(/(a)(?=\1)/, 'aa', 0, 1) -n(/(a)$|\1/, 'az') -x(/(a)\1/, 'aa', 0, 2) -n(/(a)\1/, 'ab') -x(/(a?)\1/, 'aa', 0, 2) -x(/(a??)\1/, 'aa', 0, 0) -x(/(a*)\1/, 'aaaaa', 0, 4) -x(/(a*)\1/, 'aaaaa', 0, 2, 1) -x(/a(b*)\1/, 'abbbb', 0, 5) -x(/a(b*)\1/, 'ab', 0, 1) -x(/(a*)(b*)\1\2/, 'aaabbaaabb', 0, 10) -x(/(a*)(b*)\2/, 'aaabbbb', 0, 7) -x(/(((((((a*)b))))))c\7/, 'aaabcaaa', 0, 8) -x(/(((((((a*)b))))))c\7/, 'aaabcaaa', 0, 3, 7) -x(/(a)(b)(c)\2\1\3/, 'abcbac', 0, 6) -x(/([a-d])\1/, 'cc', 0, 2) -x(/(\w\d\s)\1/, 'f5 f5 ', 0, 6) -n(/(\w\d\s)\1/, 'f5 f5') -x(/(who|[a-c]{3})\1/, 'whowho', 0, 6) -x(/...(who|[a-c]{3})\1/, 'abcwhowho', 0, 9) -x(/(who|[a-c]{3})\1/, 'cbccbc', 0, 6) -x(/(^a)\1/, 'aa', 0, 2) -n(/(^a)\1/, 'baa') -n(/(a$)\1/, 'aa') -n(/(ab\Z)\1/, 'ab') -x(/(a*\Z)\1/, 'a', 1, 1) -x(/.(a*\Z)\1/, 'ba', 1, 2) -x(/(.(abc)\2)/, 'zabcabc', 0, 7, 1) -x(/(.(..\d.)\2)/, 'z12341234', 0, 9, 1) -x(/((?i:az))\1/, 'AzAz', 0, 4) -n(/((?i:az))\1/, 'Azaz') -x(/(?<=a)b/, 'ab', 1, 2) -n(/(?<=a)b/, 'bb') -x(/(?<=a|b)b/, 'bb', 1, 2) -x(/(?<=a|bc)b/, 'bcb', 2, 3) -x(/(?<=a|bc)b/, 'ab', 1, 2) -x(/(?<=a|bc||defghij|klmnopq|r)z/, 'rz', 1, 2) -x(/(?a)/, 'a', 0, 1) -x(/(?ab)\1/, 'abab', 0, 4) -x(/(?.zv.)\k/, 'azvbazvb', 0, 8) -x(/(?<=\g)|-\zEND (?XyZ)/, 'XyZ', 3, 3) -x(/(?|a\g)+/, '', 0, 0) -x(/(?|\(\g\))+$/, '()(())', 0, 6) -x(/\g(?.){0}/, 'X', 0, 1, 1) -x(/\g(abc|df(?.YZ){2,8}){0}/, 'XYZ', 0, 3) -x(/\A(?(a\g)|)\z/, 'aaaa', 0, 4) -x(/(?|\g\g)\z|\zEND (?a|(b)\g)/, 'bbbbabba', 0, 8) -x(/(?<@:name[1240]>\w+\sx)a+\k<@:name[1240]>/, ' fg xaaaaaaaafg x', 2, 18) -x(/(z)()()(?<9>a)\4/, 'zaa', 1, 2, 4) -x(/(.)(((?<*>a)))\k<*>/, 'zaa', 0, 3) -x(/((?\d)|(?\w))(\k|\k)/, 'ff', 0, 2) -x(/(?:(?)|(?efg))\k/, '', 0, 0) -x(/(?:(?<@x>abc)|(?<@x>efg))\k<@x>/, 'abcefgefg', 3, 9) -n(/(?:(?<@x>abc)|(?<@x>efg))\k<@x>/, 'abcefg') -x(/(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\k$/, 'a-pyumpyum', 2, 10) -x(/(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\k$/, 'xxxxabcdefghijklmnabcdefghijklmn', 4, 18, 14) -x(/(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$/, 'aaa', 0, 3, 16) -x(/(?a|\(\g\))/, 'a', 0, 1) -x(/(?a|\(\g\))/, '((((((a))))))', 0, 13) -x(/(?a|\(\g\))/, '((((((((a))))))))', 0, 17, 1) -x(/\g|\zEND(?.*abc$)/, 'abcxxxabc', 0, 9) -x(/\g<1>|\zEND(.a.)/, 'bac', 0, 3) -x(/\g<2>\g<1>|\zEND(.a.)(?.b.)/, 'xbxyay', 3, 6, 1) -x(/\A(?:\g|\g|\zEND (?a|c\gc)(?b|d\gd))$/, 'cdcbcdc', 0, 7) -x(/\A(?|a\g)\z|\zEND (?\g)/, 'aaaa', 0, 4) -x(/(?(a|b\gc){3,5})/, 'baaaaca', 1, 5) -x(/(?(a|b\gc){3,5})/, 'baaaacaaaaa', 0, 10) - -r(//, '', 0) -r(/a/, 'a', 0) -r(/a/, 'a', 0, 1) -r(/b/, 'abc', 1) -r(/b/, 'abc', 1, 2) -r(/./, 'a', 0) -r(/.*/, 'abcde fgh', 9) -r(/a*/, 'aaabbc', 6) -r(/a+/, 'aaabbc', 2) -r(/a?/, 'bac', 3) -r(/a??/, 'bac', 3) -r(/abcde/, 'abcdeavcd', 0) -r(/\w\d\s/, ' a2 aa $3 ', 2) -r(/[c-f]aa[x-z]/, '3caaycaaa', 1) -r(/(?i:fG)g/, 'fGgFggFgG', 3) -r(/a|b/, 'b', 0) -r(/ab|bc|cd/, 'bcc', 0) -r(/(ffy)\1/, 'ffyffyffy', 3) -r(/|z/, 'z', 1) -r(/^az/, 'azaz', 0) -r(/az$/, 'azaz', 2) -r(/(((.a)))\3/, 'zazaaa', 0) -r(/(ac*?z)\1/, 'aacczacczacz', 1) -r(/aaz{3,4}/, 'bbaabbaazzzaazz', 6) -r(/\000a/, "b\000a", 1) -r(/ff\xfe/, "fff\xfe", 1) -r(/...abcdefghijklmnopqrstuvwxyz/, 'zzzzzabcdefghijklmnopqrstuvwxyz', 2) -end - -def test_euc(enc) -$KCODE = enc - -x(//, '¤¢', 0, 0) -x(/¤¢/, '¤¢', 0, 2) -n(/¤¤/, '¤¢') -x(/¤¦¤¦/, '¤¦¤¦', 0, 4) -x(/¤¢¤¤¤¦/, '¤¢¤¤¤¦', 0, 6) -x(/¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³/, '¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³', 0, 70) -x(/¤¢/, '¤¤¤¢', 2, 4) -x(/¤¤¤¦/, '¤¢¤¤¤¦', 2, 6) -x(/\xca\xb8/, "\xca\xb8", 0, 2) -x(/./, '¤¢', 0, 2) -x(/../, '¤«¤­', 0, 4) -x(/\w/, '¤ª', 0, 2) -n(/\W/, '¤¢') -x(/\S/, '¤½', 0, 2) -x(/\S/, '´Á', 0, 2) -x(/\b/, 'µ¤ ', 0, 0) -x(/\b/, ' ¤Û', 1, 1) -x(/\B/, '¤»¤½ ', 2, 2) -x(/\B/, '¤¦ ', 3, 3) -x(/\B/, ' ¤¤', 0, 0) -x(/[¤¿¤Á]/, '¤Á', 0, 2) -n(/[¤Ê¤Ë]/, '¤Ì') -x(/[¤¦-¤ª]/, '¤¨', 0, 2) -n(/[^¤±]/, '¤±') -x(/[\w]/, '¤Í', 0, 2) -n(/[\d]/, '¤Õ') -x(/[\D]/, '¤Ï', 0, 2) -n(/[\s]/, '¤¯') -x(/[\S]/, '¤Ø', 0, 2) -x(/[\w\d]/, '¤è', 0, 2) -x(/[\w\d]/, ' ¤è', 3, 5) -#x(/[\xa4\xcf-\xa4\xd3]/, "\xa4\xd0", 0, 2) # diff spec with GNU regex. -#n(/[\xb6\xe7-\xb6\xef]/, "\xb6\xe5") # diff spec with GNU regex. -n(/\wµ´¼Ö/, ' µ´¼Ö') -x(/µ´\W¼Ö/, 'µ´ ¼Ö', 0, 5) -x(/¤¢.¤¤.¤¦/, '¤¢¤¢¤¤¤¤¤¦', 0, 10) -x(/.\w¤¦\W..¤¾/, '¤¨¤¦¤¦ ¤¦¤¾¤¾', 0, 13) -x(/\s\w¤³¤³¤³/, ' ¤³¤³¤³¤³', 0, 9) -x(/¤¢¤¢.¤±/, '¤¢¤¢¤±¤±', 0, 8) -n(/.¤¤/, '¤¤¤¨') -x(/.¤ª/, '¤ª¤ª', 0, 4) -x(/^¤¢/, '¤¢', 0, 2) -x(/^¤à$/, '¤à', 0, 2) -x(/^\w$/, '¤Ë', 0, 2) -x(/^\w¤«¤­¤¯¤±¤³$/, 'z¤«¤­¤¯¤±¤³', 0, 11) -x(/^\w...¤¦¤¨¤ª$/, 'z¤¢¤¤¤¦¤¦¤¨¤ª', 0, 13) -x(/\w\w\s\W¤ª¤ª¤ª\d/, 'a¤ª ¤ª¤ª¤ª4', 0, 12) -x(/\A¤¿¤Á¤Ä/, '¤¿¤Á¤Ä', 0, 6) -x(/¤à¤á¤â\Z/, '¤à¤á¤â', 0, 6) -x(/¤«¤­¤¯\z/, '¤«¤­¤¯', 0, 6) -x(/¤«¤­¤¯\Z/, "¤«¤­¤¯\n", 0, 6) -x(/\G¤Ý¤Ô/, '¤Ý¤Ô', 0, 4) -n(/\G¤¨/, '¤¦¤¨¤ª') -n(/¤È¤Æ\G/, '¤È¤Æ') -n(/¤Þ¤ß\A/, '¤Þ¤ß') -n(/¤Þ\A¤ß/, '¤Þ¤ß') -x(/(?=¤»)¤»/, '¤»', 0, 2) -n(/(?=¤¦)./, '¤¤') -x(/(?!¤¦)¤«/, '¤«', 0, 2) -n(/(?!¤È)¤¢/, '¤È') -x(/(?i:¤¢)/, '¤¢', 0, 2) -x(/(?i:¤Ö¤Ù)/, '¤Ö¤Ù', 0, 4) -n(/(?i:¤¤)/, '¤¦') -x(/(?m:¤è.)/, "¤è\n", 0, 3) -x(/(?m:.¤á)/, "¤Þ\n¤á", 2, 5) -x(/¤¢?/, '', 0, 0) -x(/ÊÑ?/, '²½', 0, 0) -x(/ÊÑ?/, 'ÊÑ', 0, 2) -x(/ÎÌ*/, '', 0, 0) -x(/ÎÌ*/, 'ÎÌ', 0, 2) -x(/»Ò*/, '»Ò»Ò»Ò', 0, 6) -x(/ÇÏ*/, '¼¯ÇÏÇÏÇÏÇÏ', 0, 0) -n(/»³+/, '') -x(/²Ï+/, '²Ï', 0, 2) -x(/»þ+/, '»þ»þ»þ»þ', 0, 8) -x(/¤¨+/, '¤¨¤¨¤¦¤¦¤¦', 0, 4) -x(/¤¦+/, '¤ª¤¦¤¦¤¦¤¦', 2, 10) -x(/.?/, '¤¿', 0, 2) -x(/.*/, '¤Ñ¤Ô¤×¤Ú', 0, 8) -x(/.+/, '¤í', 0, 2) -x(/.+/, "¤¤¤¦¤¨¤«\n", 0, 8) -x(/¤¢|¤¤/, '¤¢', 0, 2) -x(/¤¢|¤¤/, '¤¤', 0, 2) -x(/¤¢¤¤|¤¤¤¦/, '¤¢¤¤', 0, 4) -x(/¤¢¤¤|¤¤¤¦/, '¤¤¤¦', 0, 4) -x(/¤ò(?:¤«¤­|¤­¤¯)/, '¤ò¤«¤­', 0, 6) -x(/¤ò(?:¤«¤­|¤­¤¯)¤±/, '¤ò¤­¤¯¤±', 0, 8) -x(/¤¢¤¤|(?:¤¢¤¦|¤¢¤ò)/, '¤¢¤ò', 0, 4) -x(/¤¢|¤¤|¤¦/, '¤¨¤¦', 2, 4) -x(/¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í/, '¤·¤¹¤»', 0, 6) -n(/¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í/, '¤¹¤»') -x(/¤¢|^¤ï/, '¤Ö¤¢', 2, 4) -x(/¤¢|^¤ò/, '¤ò¤¢', 0, 2) -x(/µ´|\G¼Ö/, '¤±¼Öµ´', 4, 6) -x(/µ´|\G¼Ö/, '¼Öµ´', 0, 2) -x(/µ´|\A¼Ö/, 'b¼Öµ´', 3, 5) -x(/µ´|\A¼Ö/, '¼Ö', 0, 2) -x(/µ´|¼Ö\Z/, '¼Öµ´', 2, 4) -x(/µ´|¼Ö\Z/, '¼Ö', 0, 2) -x(/µ´|¼Ö\Z/, "¼Ö\n", 0, 2) -x(/µ´|¼Ö\z/, '¼Öµ´', 2, 4) -x(/µ´|¼Ö\z/, '¼Ö', 0, 2) -x(/\w|\s/, '¤ª', 0, 2) -x(/\w|%/, '%¤ª', 0, 1) -x(/\w|[&$]/, '¤¦&', 0, 2) -x(/[¤¤-¤±]/, '¤¦', 0, 2) -x(/[¤¤-¤±]|[^¤«-¤³]/, '¤¢', 0, 2) -x(/[¤¤-¤±]|[^¤«-¤³]/, '¤«', 0, 2) -x(/(?:¤¢|[¤¦-¤­])|¤¤¤ò/, '¤¦¤ò', 0, 2) -x(/(?:¤¢|[¤¦-¤­])|¤¤¤ò/, '¤¤¤ò', 0, 4) -x(/¤¢¤¤¤¦|(?=¤±¤±)..¤Û/, '¤±¤±¤Û', 0, 6) -x(/¤¢¤¤¤¦|(?!¤±¤±)..¤Û/, '¤¢¤¤¤Û', 0, 6) -x(/(?=¤ò¤¢)..¤¢|(?=¤ò¤ò)..¤¢/, '¤ò¤ò¤¢', 0, 6) -x(/(?<=¤¢|¤¤¤¦)¤¤/, '¤¤¤¦¤¤', 4, 6) -n(/(?>¤¢|¤¢¤¤¤¨)¤¦/, '¤¢¤¤¤¨¤¦') -x(/(?>¤¢¤¤¤¨|¤¢)¤¦/, '¤¢¤¤¤¨¤¦', 0, 8) -x(/¤¢?|¤¤/, '¤¢', 0, 2) -x(/¤¢?|¤¤/, '¤¤', 0, 0) -x(/¤¢?|¤¤/, '', 0, 0) -x(/¤¢*|¤¤/, '¤¢¤¢', 0, 4) -x(/¤¢*|¤¤*/, '¤¤¤¢', 0, 0) -x(/¤¢*|¤¤*/, '¤¢¤¤', 0, 2) -x(/[a¤¢]*|¤¤*/, 'a¤¢¤¤¤¤¤¤', 0, 3) -x(/¤¢+|¤¤*/, '', 0, 0) -x(/¤¢+|¤¤*/, '¤¤¤¤¤¤', 0, 6) -x(/¤¢+|¤¤*/, '¤¢¤¤¤¤¤¤', 0, 2) -x(/¤¢+|¤¤*/, 'a¤¢¤¤¤¤¤¤', 0, 0) -n(/¤¢+|¤¤+/, '') -x(/(¤¢|¤¤)?/, '¤¤', 0, 2) -x(/(¤¢|¤¤)*/, '¤¤¤¢', 0, 4) -x(/(¤¢|¤¤)+/, '¤¤¤¢¤¤', 0, 6) -x(/(¤¢¤¤|¤¦¤¢)+/, '¤¦¤¢¤¢¤¤¤¦¤¨', 0, 8) -x(/(¤¢¤¤|¤¦¤¨)+/, '¤¦¤¢¤¢¤¤¤¦¤¨', 4, 12) -x(/(¤¢¤¤|¤¦¤¢)+/, '¤¢¤¢¤¤¤¦¤¢', 2, 10) -x(/(¤¢¤¤|¤¦¤¢)+/, '¤¢¤¤¤ò¤¦¤¢', 0, 4) -x(/(¤¢¤¤|¤¦¤¢)+/, '$$zzzz¤¢¤¤¤ò¤¦¤¢', 6, 10) -x(/(¤¢|¤¤¤¢¤¤)+/, '¤¢¤¤¤¢¤¤¤¢', 0, 10) -x(/(¤¢|¤¤¤¢¤¤)+/, '¤¤¤¢', 2, 4) -x(/(¤¢|¤¤¤¢¤¤)+/, '¤¤¤¢¤¢¤¢¤¤¤¢', 2, 8) -x(/(?:¤¢|¤¤)(?:¤¢|¤¤)/, '¤¢¤¤', 0, 4) -x(/(?:¤¢*|¤¤*)(?:¤¢*|¤¤*)/, '¤¢¤¢¤¢¤¤¤¤¤¤', 0, 6) -x(/(?:¤¢*|¤¤*)(?:¤¢+|¤¤+)/, '¤¢¤¢¤¢¤¤¤¤¤¤', 0, 12) -x(/(?:¤¢+|¤¤+){2}/, '¤¢¤¢¤¢¤¤¤¤¤¤', 0, 12) -x(/(?:¤¢+|¤¤+){1,2}/, '¤¢¤¢¤¢¤¤¤¤¤¤', 0, 12) -x(/(?:¤¢+|\A¤¤*)¤¦¤¦/, '¤¦¤¦', 0, 4) -n(/(?:¤¢+|\A¤¤*)¤¦¤¦/, '¤¢¤¤¤¦¤¦') -x(/(?:^¤¢+|¤¤+)*¤¦/, '¤¢¤¢¤¤¤¤¤¤¤¢¤¤¤¦', 12, 16) -x(/(?:^¤¢+|¤¤+)*¤¦/, '¤¢¤¢¤¤¤¤¤¤¤¤¤¦', 0, 14) -x(/¤¦{0,}/, '¤¦¤¦¤¦¤¦', 0, 8) -x(/¤¢|(?i)c/, 'C', 0, 1) -x(/(?i)c|¤¢/, 'C', 0, 1) -x(/(?i:¤¢)|a/, 'a', 0, 1) -n(/(?i:¤¢)|a/, 'A') -x(/[¤¢¤¤¤¦]?/, '¤¢¤¤¤¦', 0, 2) -x(/[¤¢¤¤¤¦]*/, '¤¢¤¤¤¦', 0, 6) -x(/[^¤¢¤¤¤¦]*/, '¤¢¤¤¤¦', 0, 0) -n(/[^¤¢¤¤¤¦]+/, '¤¢¤¤¤¦') -x(/¤¢??/, '¤¢¤¢¤¢', 0, 0) -x(/¤¤¤¢??¤¤/, '¤¤¤¢¤¤', 0, 6) -x(/¤¢*?/, '¤¢¤¢¤¢', 0, 0) -x(/¤¤¤¢*?/, '¤¤¤¢¤¢', 0, 2) -x(/¤¤¤¢*?¤¤/, '¤¤¤¢¤¢¤¤', 0, 8) -x(/¤¢+?/, '¤¢¤¢¤¢', 0, 2) -x(/¤¤¤¢+?/, '¤¤¤¢¤¢', 0, 4) -x(/¤¤¤¢+?¤¤/, '¤¤¤¢¤¢¤¤', 0, 8) -x(/(?:Å·?)??/, 'Å·', 0, 0) -x(/(?:Å·??)?/, 'Å·', 0, 0) -x(/(?:Ì´?)+?/, 'Ì´Ì´Ì´', 0, 2) -x(/(?:É÷+)??/, 'É÷É÷É÷', 0, 0) -x(/(?:Àã+)??Áú/, 'ÀãÀãÀãÁú', 0, 8) -i(/(?:¤¢¤¤)?{2}/, '', 0, 0) # GNU regex bug -x(/(?:µ´¼Ö)?{2}/, 'µ´¼Öµ´¼Öµ´', 0, 8) -x(/(?:µ´¼Ö)*{0}/, 'µ´¼Öµ´¼Öµ´', 0, 0) -x(/(?:µ´¼Ö){3,}/, 'µ´¼Öµ´¼Öµ´¼Öµ´¼Ö', 0, 16) -n(/(?:µ´¼Ö){3,}/, 'µ´¼Öµ´¼Ö') -x(/(?:µ´¼Ö){2,4}/, 'µ´¼Öµ´¼Öµ´¼Ö', 0, 12) -x(/(?:µ´¼Ö){2,4}/, 'µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö', 0, 16) -x(/(?:µ´¼Ö){2,4}?/, 'µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö', 0, 8) -x(/(?:µ´¼Ö){,}/, 'µ´¼Ö{,}', 0, 7) -x(/(?:¤«¤­¤¯)+?{2}/, '¤«¤­¤¯¤«¤­¤¯¤«¤­¤¯', 0, 12) -x(/(²Ð)/, '²Ð', 0, 2, 1) -x(/(²Ð¿å)/, '²Ð¿å', 0, 4, 1) -x(/((»þ´Ö))/, '»þ´Ö', 0, 4) -x(/((É÷¿å))/, 'É÷¿å', 0, 4, 1) -x(/((ºòÆü))/, 'ºòÆü', 0, 4, 2) -x(/((((((((((((((((((((ÎÌ»Ò))))))))))))))))))))/, 'ÎÌ»Ò', 0, 4, 20) -x(/(¤¢¤¤)(¤¦¤¨)/, '¤¢¤¤¤¦¤¨', 0, 4, 1) -x(/(¤¢¤¤)(¤¦¤¨)/, '¤¢¤¤¤¦¤¨', 4, 8, 2) -x(/()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³/, '¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³', 6, 12, 3) -x(/(()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³)/, '¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³', 6, 12, 4) -x(/.*(¥Õ¥©)¥ó¡¦¥Þ(¥ó()¥·¥å¥¿)¥¤¥ó/, '¥Õ¥©¥ó¡¦¥Þ¥ó¥·¥å¥¿¥¤¥ó', 10, 18, 2) -x(/(^¤¢)/, '¤¢', 0, 2) -x(/(¤¢)|(¤¢)/, '¤¤¤¢', 2, 4, 1) -x(/(^¤¢)|(¤¢)/, '¤¤¤¢', 2, 4, 2) -x(/(¤¢?)/, '¤¢¤¢¤¢', 0, 2, 1) -x(/(¤Þ*)/, '¤Þ¤Þ¤Þ', 0, 6, 1) -x(/(¤È*)/, '', 0, 0, 1) -x(/(¤ë+)/, '¤ë¤ë¤ë¤ë¤ë¤ë¤ë', 0, 14, 1) -x(/(¤Õ+|¤Ø*)/, '¤Õ¤Õ¤Õ¤Ø¤Ø', 0, 6, 1) -x(/(¤¢+|¤¤?)/, '¤¤¤¤¤¤¤¢¤¢', 0, 2, 1) -x(/(¤¢¤¤¤¦)?/, '¤¢¤¤¤¦', 0, 6, 1) -x(/(¤¢¤¤¤¦)*/, '¤¢¤¤¤¦', 0, 6, 1) -x(/(¤¢¤¤¤¦)+/, '¤¢¤¤¤¦', 0, 6, 1) -x(/(¤µ¤·¤¹|¤¢¤¤¤¦)+/, '¤¢¤¤¤¦', 0, 6, 1) -x(/([¤Ê¤Ë¤Ì][¤«¤­¤¯]|¤«¤­¤¯)+/, '¤«¤­¤¯', 0, 6, 1) -x(/((?i:¤¢¤¤¤¦))/, '¤¢¤¤¤¦', 0, 6, 1) -x(/((?m:¤¢.¤¦))/, "¤¢\n¤¦", 0, 5, 1) -x(/((?=¤¢¤ó)¤¢)/, '¤¢¤ó¤¤', 0, 2, 1) -x(/¤¢¤¤¤¦|(.¤¢¤¤¤¨)/, '¤ó¤¢¤¤¤¨', 0, 8, 1) -x(/¤¢*(.)/, '¤¢¤¢¤¢¤¢¤ó', 8, 10, 1) -x(/¤¢*?(.)/, '¤¢¤¢¤¢¤¢¤ó', 0, 2, 1) -x(/¤¢*?(¤ó)/, '¤¢¤¢¤¢¤¢¤ó', 8, 10, 1) -x(/[¤¤¤¦¤¨]¤¢*(.)/, '¤¨¤¢¤¢¤¢¤¢¤ó', 10, 12, 1) -x(/(\A¤¤¤¤)¤¦¤¦/, '¤¤¤¤¤¦¤¦', 0, 4, 1) -n(/(\A¤¤¤¤)¤¦¤¦/, '¤ó¤¤¤¤¤¦¤¦') -x(/(^¤¤¤¤)¤¦¤¦/, '¤¤¤¤¤¦¤¦', 0, 4, 1) -n(/(^¤¤¤¤)¤¦¤¦/, '¤ó¤¤¤¤¤¦¤¦') -x(/¤í¤í(¤ë¤ë$)/, '¤í¤í¤ë¤ë', 4, 8, 1) -n(/¤í¤í(¤ë¤ë$)/, '¤í¤í¤ë¤ë¤ë') -x(/(̵)\1/, '̵̵', 0, 4) -n(/(̵)\1/, '̵Éð') -x(/(¶õ?)\1/, '¶õ¶õ', 0, 4) -x(/(¶õ??)\1/, '¶õ¶õ', 0, 0) -x(/(¶õ*)\1/, '¶õ¶õ¶õ¶õ¶õ', 0, 8) -x(/(¶õ*)\1/, '¶õ¶õ¶õ¶õ¶õ', 0, 4, 1) -x(/¤¢(¤¤*)\1/, '¤¢¤¤¤¤¤¤¤¤', 0, 10) -x(/¤¢(¤¤*)\1/, '¤¢¤¤', 0, 2) -x(/(¤¢*)(¤¤*)\1\2/, '¤¢¤¢¤¢¤¤¤¤¤¢¤¢¤¢¤¤¤¤', 0, 20) -x(/(¤¢*)(¤¤*)\2/, '¤¢¤¢¤¢¤¤¤¤¤¤¤¤', 0, 14) -x(/(¤¢*)(¤¤*)\2/, '¤¢¤¢¤¢¤¤¤¤¤¤¤¤', 6, 10, 2) -x(/(((((((¤Ý*)¤Ú))))))¤Ô\7/, '¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý', 0, 16) -x(/(((((((¤Ý*)¤Ú))))))¤Ô\7/, '¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý', 0, 6, 7) -x(/(¤Ï)(¤Ò)(¤Õ)\2\1\3/, '¤Ï¤Ò¤Õ¤Ò¤Ï¤Õ', 0, 12) -x(/([¤­-¤±])\1/, '¤¯¤¯', 0, 4) -x(/(\w\d\s)\1/, '¤¢5 ¤¢5 ', 0, 8) -n(/(\w\d\s)\1/, '¤¢5 ¤¢5') -x(/(ï¡©|[¤¢-¤¦]{3})\1/, 'ï¡©', 0, 8) -x(/...(ï¡©|[¤¢-¤¦]{3})\1/, '¤¢a¤¢Ã¯¡©Ã¯¡©', 0, 13) -x(/(ï¡©|[¤¢-¤¦]{3})\1/, '¤¦¤¤¤¦¤¦¤¤¤¦', 0, 12) -x(/(^¤³)\1/, '¤³¤³', 0, 4) -n(/(^¤à)\1/, '¤á¤à¤à') -n(/(¤¢$)\1/, '¤¢¤¢') -n(/(¤¢¤¤\Z)\1/, '¤¢¤¤') -x(/(¤¢*\Z)\1/, '¤¢', 2, 2) -x(/.(¤¢*\Z)\1/, '¤¤¤¢', 2, 4) -x(/(.(¤ä¤¤¤æ)\2)/, 'z¤ä¤¤¤æ¤ä¤¤¤æ', 0, 13, 1) -x(/(.(..\d.)\2)/, '¤¢12341234', 0, 10, 1) -x(/((?i:¤¢v¤º))\1/, '¤¢v¤º¤¢v¤º', 0, 10) -x(/(?<¶ò¤«>ÊÑ|\(\g<¶ò¤«>\))/, '((((((ÊÑ))))))', 0, 14) -x(/\A(?:\g<°¤-1>|\g<±¾-2>|\z½ªÎ» (?<°¤-1>´Ñ|¼«\g<±¾-2>¼«)(?<±¾-2>ºß|Êî»§\g<°¤-1>Êî»§))$/, 'Êî»§¼«Êî»§¼«ºß¼«Êî»§¼«Êî»§', 0, 26) -x(/[[¤Ò¤Õ]]/, '¤Õ', 0, 2) -x(/[[¤¤¤ª¤¦]¤«]/, '¤«', 0, 2) -n(/[[^¤¢]]/, '¤¢') -n(/[^[¤¢]]/, '¤¢') -x(/[^[^¤¢]]/, '¤¢', 0, 2) -x(/[[¤«¤­¤¯]&&¤­¤¯]/, '¤¯', 0, 2) -n(/[[¤«¤­¤¯]&&¤­¤¯]/, '¤«') -n(/[[¤«¤­¤¯]&&¤­¤¯]/, '¤±') -x(/[¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]/, '¤ñ', 0, 2) -n(/[^¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]/, '¤ñ') -x(/[[^¤¢&&¤¢]&&¤¢-¤ó]/, '¤¤', 0, 2) -n(/[[^¤¢&&¤¢]&&¤¢-¤ó]/, '¤¢') -x(/[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]/, '¤­', 0, 2) -n(/[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]/, '¤¤') -x(/[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]/, '¤¦', 0, 2) -x(/[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]/, '¤¨', 0, 2) -n(/[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]/, '¤«') -x(/[¤¢-&&-¤¢]/, '-', 0, 1) -x(/[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]q-w]/, '¤¨', 0, 2) -x(/[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]/, 'f', 0, 1) -x(/[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]/, 'g', 0, 1) -n(/[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]/, '2') -r(/¤¢/, '¤¢', 0) -r(/¤¢/, '¤¢', 0, 2) -r(/¤¤/, '¤¢¤¤¤¦', 2) -r(/¤¤/, '¤¢¤¤¤¦', 2, 4) -r(/./, '¤¢', 0) -r(/.*/, '¤¢¤¤¤¦¤¨¤ª ¤«¤­¤¯', 17) -r(/.*¤¨¤ª/, '¤¢¤¤¤¦¤¨¤ª ¤«¤­¤¯', 6) -r(/¤¢*/, '¤¢¤¢¤¢¤¤¤¤¤¦', 12) -r(/¤¢+/, '¤¢¤¢¤¢¤¤¤¤¤¦', 4) -r(/¤¢?/, '¤¤¤¢¤¦', 6) -r(/Á´??/, 'ÉéÁ´ÊÑ', 6) -r(/aÊÕc´Áe/, 'aÊÕc´Áeavcd', 0) -r(/\w\d\s/, ' ¤¢2 ¤¦¤¦ $3 ', 2) -r(/[¤¦-¤ª]¤¢¤¢[¤È-¤ó]/, '3¤¦¤¢¤¢¤Ê¤¦¤¢¤¢¤¢', 1) -r(/¤¢|¤¤/, '¤¤', 0) -r(/¤¢¤¤|¤¤¤¦|¤¦¤¨/, '¤¤¤¦¤¦', 0) -r(/(¤È¤È¤Á)\1/, '¤È¤È¤Á¤È¤È¤Á¤È¤È¤Á', 6) -r(/|¤¨/, '¤¨', 2) -r(/^¤¢¤º/, '¤¢¤º¤¢¤º', 0) -r(/¤¢¤º$/, '¤¢¤º¤¢¤º', 4) -r(/(((.¤¢)))\3/, 'z¤¢z¤¢¤¢¤¢', 0) -r(/(¤¢¤¦*?¤ó)\1/, '¤¢¤¢¤¦¤¦¤ó¤¢¤¦¤¦¤ó¤¢¤¦¤ó', 2) -r(/¤¢¤¢¤ó{3,4}/, '¤Æ¤Æ¤¢¤¢¤¤¤¤¤¢¤¢¤ó¤ó¤ó¤¢¤¢¤ó¤¢¤¢¤ó', 12) -r(/\000¤¢/, "¤¤\000¤¢", 2) -r(/¤È¤È\xfe\xfe/, "¤È¤È¤È\xfe\xfe", 2) -r(/...¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½/, 'zzzzz¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½', 2) -end - -test_sb('ASCII') -test_sb('EUC') -test_sb('SJIS') -test_sb('UTF8') -test_euc('EUC') - - -# UTF-8 (by UENO Katsuhiro) -$KCODE = 'UTF-8' - -s = "\xe3\x81\x82\xe3\x81\x81\xf0\x90\x80\x85\xe3\x81\x8a\xe3\x81\x85" -x(/[\xc2\x80-\xed\x9f\xbf]+/u, s, 0, 6) - -s = "\xf0\x90\x80\x85\xe3\x81\x82" -x(/[\xc2\x80-\xed\x9f\xbf]/u, s, 4, 7) - -s = "\xed\x9f\xbf" -n(/[\xc2\x80-\xed\x9f\xbe]/u, s) - -s = "\xed\x9f\xbf" -n(/[\xc2\x80-\xed\x9f\xbe]/u, s) - -s = "\xed\x9f\xbf" -n(/[\xc2\x80-\xed\x9f\xbe]/u, s) - -s = "\xed\x9f\xbf" -n(/[\xc3\xad\xed\x9f\xbe]/u, s) - -s = "\xed\x9f\xbf" -n(/[\xc4\x80-\xed\x9f\xbe]/u, s) - -s = "\xed\x9f\xbf\xf0\x90\x80\x85\xed\x9f\xbf" -x(/[^\xc2\x80-\xed\x9f\xbe]/u, s, 0, 3) - -s = "\xed\x9f\xbf" -x(/[^\xc3\xad\xed\x9f\xbe]/u, s, 0, 3) - -s = "\xed\x9f\xbf\xf0\x90\x80\x85\xed\x9f\xbf" -x(/[^\xc4\x80-\xed\x9f\xbe]/u, s, 0, 3) - -s = "\xc3\xbe\xc3\xbf" -n(/[\xfe\xff\xc3\x80]/u, s) - - -# Japanese long text. -$KCODE = 'EUC' - -s = <", "aa", 0, 2); x2("(?a)", "a", 0, 1); - x2("(?ab)\\1", "abab", 0, 4); - x2("(?.zv.)\\k", "azvbazvb", 0, 8); + x2("(?ab)\\g", "abab", 0, 4); + x2("(?.zv.)\\k", "azvbazvb", 0, 8); x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); x2("(?|a\\g)+", "", 0, 0); x2("(?|\\(\\g\\))+$", "()(())", 0, 6); @@ -503,13 +522,13 @@ extern int main(int argc, char* argv[]) x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); - x2("(?<@:name[1240]>\\w+\\sx)a+\\k<@:name[1240]>", " fg xaaaaaaaafg x", 2, 18); - x3("(z)()()(?<9>a)\\4", "zaa", 1, 2, 4); - x2("(.)(((?<*>a)))\\k<*>", "zaa", 0, 3); + x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); x2("(?:(?)|(?efg))\\k", "", 0, 0); - x2("(?:(?<@x>abc)|(?<@x>efg))\\k<@x>", "abcefgefg", 3, 9); - n("(?:(?<@x>abc)|(?<@x>efg))\\k<@x>", "abcefg"); + x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); + n("(?:(?abc)|(?efg))\\k", "abcefg"); x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); @@ -518,286 +537,296 @@ extern int main(int argc, char* argv[]) x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); - x3("\\g<2>\\g<1>|\\zEND(.a.)(?.b.)", "xbxyay", 3, 6, 1); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); - x2("", "‚ ", 0, 0); - x2("‚ ", "‚ ", 0, 2); - n("‚¢", "‚ "); - x2("‚¤‚¤", "‚¤‚¤", 0, 4); - x2("‚ ‚¢‚¤", "‚ ‚¢‚¤", 0, 6); - x2("‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", "‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", 0, 70); - x2("‚ ", "‚¢‚ ", 2, 4); - x2("‚¢‚¤", "‚ ‚¢‚¤", 2, 6); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("", "¤¢", 0, 0); + x2("¤¢", "¤¢", 0, 2); + n("¤¤", "¤¢"); + x2("¤¦¤¦", "¤¦¤¦", 0, 4); + x2("¤¢¤¤¤¦", "¤¢¤¤¤¦", 0, 6); + x2("¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", "¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", 0, 70); + x2("¤¢", "¤¤¤¢", 2, 4); + x2("¤¤¤¦", "¤¢¤¤¤¦", 2, 6); x2("\\xca\\xb8", "\xca\xb8", 0, 2); - x2(".", "‚ ", 0, 2); - x2("..", "‚©‚«", 0, 4); - x2("\\w", "‚¨", 0, 2); - n("\\W", "‚ "); - x2("\\S", "‚»", 0, 2); - x2("\\S", "Š¿", 0, 2); - x2("\\b", "‹C ", 0, 0); - x2("\\b", " ‚Ù", 1, 1); - x2("\\B", "‚¹‚» ", 2, 2); - x2("\\B", "‚¤ ", 3, 3); - x2("\\B", " ‚¢", 0, 0); - x2("[‚½‚¿]", "‚¿", 0, 2); - n("[‚È‚É]", "‚Ê"); - x2("[‚¤-‚¨]", "‚¦", 0, 2); - n("[^‚¯]", "‚¯"); - x2("[\\w]", "‚Ë", 0, 2); - n("[\\d]", "‚Ó"); - x2("[\\D]", "‚Í", 0, 2); - n("[\\s]", "‚­"); - x2("[\\S]", "‚Ö", 0, 2); - x2("[\\w\\d]", "‚æ", 0, 2); - x2("[\\w\\d]", " ‚æ", 3, 5); - n("\\w‹SŽÔ", " ‹SŽÔ"); - x2("‹S\\WŽÔ", "‹S ŽÔ", 0, 5); - x2("‚ .‚¢.‚¤", "‚ ‚ ‚¢‚¢‚¤", 0, 10); - x2(".\\w‚¤\\W..‚¼", "‚¦‚¤‚¤ ‚¤‚¼‚¼", 0, 13); - x2("\\s\\w‚±‚±‚±", " ‚±‚±‚±‚±", 0, 9); - x2("‚ ‚ .‚¯", "‚ ‚ ‚¯‚¯", 0, 8); - n(".‚¢", "‚¢‚¦"); - x2(".‚¨", "‚¨‚¨", 0, 4); - x2("^‚ ", "‚ ", 0, 2); - x2("^‚Þ$", "‚Þ", 0, 2); - x2("^\\w$", "‚É", 0, 2); - x2("^\\w‚©‚«‚­‚¯‚±$", "z‚©‚«‚­‚¯‚±", 0, 11); - x2("^\\w...‚¤‚¦‚¨$", "z‚ ‚¢‚¤‚¤‚¦‚¨", 0, 13); - x2("\\w\\w\\s\\W‚¨‚¨‚¨\\d", "a‚¨ ‚¨‚¨‚¨4", 0, 12); - x2("\\A‚½‚¿‚Â", "‚½‚¿‚Â", 0, 6); - x2("‚Þ‚ß‚à\\Z", "‚Þ‚ß‚à", 0, 6); - x2("‚©‚«‚­\\z", "‚©‚«‚­", 0, 6); - x2("‚©‚«‚­\\Z", "‚©‚«‚­\n", 0, 6); - x2("\\G‚Û‚Ò", "‚Û‚Ò", 0, 4); - n("\\G‚¦", "‚¤‚¦‚¨"); - n("‚Æ‚Ä\\G", "‚Æ‚Ä"); - n("‚Ü‚Ý\\A", "‚Ü‚Ý"); - n("‚Ü\\A‚Ý", "‚Ü‚Ý"); - x2("(?=‚¹)‚¹", "‚¹", 0, 2); - n("(?=‚¤).", "‚¢"); - x2("(?!‚¤)‚©", "‚©", 0, 2); - n("(?!‚Æ)‚ ", "‚Æ"); - x2("(?i:‚ )", "‚ ", 0, 2); - x2("(?i:‚Ô‚×)", "‚Ô‚×", 0, 4); - n("(?i:‚¢)", "‚¤"); - x2("(?m:‚æ.)", "‚æ\n", 0, 3); - x2("(?m:.‚ß)", "‚Ü\n‚ß", 2, 5); - x2("‚ ?", "", 0, 0); - x2("•Ï?", "‰»", 0, 0); - x2("•Ï?", "•Ï", 0, 2); - x2("—Ê*", "", 0, 0); - x2("—Ê*", "—Ê", 0, 2); - x2("Žq*", "ŽqŽqŽq", 0, 6); - x2("”n*", "Ž­”n”n”n”n", 0, 0); - n("ŽR+", ""); - x2("‰Í+", "‰Í", 0, 2); - x2("Žž+", "ŽžŽžŽžŽž", 0, 8); - x2("‚¦+", "‚¦‚¦‚¤‚¤‚¤", 0, 4); - x2("‚¤+", "‚¨‚¤‚¤‚¤‚¤", 2, 10); - x2(".?", "‚½", 0, 2); - x2(".*", "‚ς҂ՂØ", 0, 8); - x2(".+", "‚ë", 0, 2); - x2(".+", "‚¢‚¤‚¦‚©\n", 0, 8); - x2("‚ |‚¢", "‚ ", 0, 2); - x2("‚ |‚¢", "‚¢", 0, 2); - x2("‚ ‚¢|‚¢‚¤", "‚ ‚¢", 0, 4); - x2("‚ ‚¢|‚¢‚¤", "‚¢‚¤", 0, 4); - x2("‚ð(?:‚©‚«|‚«‚­)", "‚ð‚©‚«", 0, 6); - x2("‚ð(?:‚©‚«|‚«‚­)‚¯", "‚ð‚«‚­‚¯", 0, 8); - x2("‚ ‚¢|(?:‚ ‚¤|‚ ‚ð)", "‚ ‚ð", 0, 4); - x2("‚ |‚¢|‚¤", "‚¦‚¤", 2, 4); - x2("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚µ‚·‚¹", 0, 6); - n("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚·‚¹"); - x2("‚ |^‚í", "‚Ô‚ ", 2, 4); - x2("‚ |^‚ð", "‚ð‚ ", 0, 2); - x2("‹S|\\GŽÔ", "‚¯ŽÔ‹S", 4, 6); - x2("‹S|\\GŽÔ", "ŽÔ‹S", 0, 2); - x2("‹S|\\AŽÔ", "bŽÔ‹S", 3, 5); - x2("‹S|\\AŽÔ", "ŽÔ", 0, 2); - x2("‹S|ŽÔ\\Z", "ŽÔ‹S", 2, 4); - x2("‹S|ŽÔ\\Z", "ŽÔ", 0, 2); - x2("‹S|ŽÔ\\Z", "ŽÔ\n", 0, 2); - x2("‹S|ŽÔ\\z", "ŽÔ‹S", 2, 4); - x2("‹S|ŽÔ\\z", "ŽÔ", 0, 2); - x2("\\w|\\s", "‚¨", 0, 2); - x2("\\w|%", "%‚¨", 0, 1); - x2("\\w|[&$]", "‚¤&", 0, 2); - x2("[‚¢-‚¯]", "‚¤", 0, 2); - x2("[‚¢-‚¯]|[^‚©-‚±]", "‚ ", 0, 2); - x2("[‚¢-‚¯]|[^‚©-‚±]", "‚©", 0, 2); - x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¤‚ð", 0, 2); - x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¢‚ð", 0, 4); - x2("‚ ‚¢‚¤|(?=‚¯‚¯)..‚Ù", "‚¯‚¯‚Ù", 0, 6); - x2("‚ ‚¢‚¤|(?!‚¯‚¯)..‚Ù", "‚ ‚¢‚Ù", 0, 6); - x2("(?=‚ð‚ )..‚ |(?=‚ð‚ð)..‚ ", "‚ð‚ð‚ ", 0, 6); - x2("(?<=‚ |‚¢‚¤)‚¢", "‚¢‚¤‚¢", 4, 6); - n("(?>‚ |‚ ‚¢‚¦)‚¤", "‚ ‚¢‚¦‚¤"); - x2("(?>‚ ‚¢‚¦|‚ )‚¤", "‚ ‚¢‚¦‚¤", 0, 8); - x2("‚ ?|‚¢", "‚ ", 0, 2); - x2("‚ ?|‚¢", "‚¢", 0, 0); - x2("‚ ?|‚¢", "", 0, 0); - x2("‚ *|‚¢", "‚ ‚ ", 0, 4); - x2("‚ *|‚¢*", "‚¢‚ ", 0, 0); - x2("‚ *|‚¢*", "‚ ‚¢", 0, 2); - x2("[a‚ ]*|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 3); - x2("‚ +|‚¢*", "", 0, 0); - x2("‚ +|‚¢*", "‚¢‚¢‚¢", 0, 6); - x2("‚ +|‚¢*", "‚ ‚¢‚¢‚¢", 0, 2); - x2("‚ +|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 0); - n("‚ +|‚¢+", ""); - x2("(‚ |‚¢)?", "‚¢", 0, 2); - x2("(‚ |‚¢)*", "‚¢‚ ", 0, 4); - x2("(‚ |‚¢)+", "‚¢‚ ‚¢", 0, 6); - x2("(‚ ‚¢|‚¤‚ )+", "‚¤‚ ‚ ‚¢‚¤‚¦", 0, 8); - x2("(‚ ‚¢|‚¤‚¦)+", "‚¤‚ ‚ ‚¢‚¤‚¦", 4, 12); - x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚ ‚¢‚¤‚ ", 2, 10); - x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚¢‚ð‚¤‚ ", 0, 4); - x2("(‚ ‚¢|‚¤‚ )+", "$$zzzz‚ ‚¢‚ð‚¤‚ ", 6, 10); - x2("(‚ |‚¢‚ ‚¢)+", "‚ ‚¢‚ ‚¢‚ ", 0, 10); - x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ", 2, 4); - x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ‚ ‚ ‚¢‚ ", 2, 8); - x2("(?:‚ |‚¢)(?:‚ |‚¢)", "‚ ‚¢", 0, 4); - x2("(?:‚ *|‚¢*)(?:‚ *|‚¢*)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 6); - x2("(?:‚ *|‚¢*)(?:‚ +|‚¢+)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12); - x2("(?:‚ +|‚¢+){2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12); - x2("(?:‚ +|‚¢+){1,2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12); - x2("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚¤‚¤", 0, 4); - n("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚ ‚¢‚¤‚¤"); - x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚ ‚¢‚¤", 12, 16); - x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚¢‚¤", 0, 14); - x2("‚¤{0,}", "‚¤‚¤‚¤‚¤", 0, 8); - x2("‚ |(?i)c", "C", 0, 1); - x2("(?i)c|‚ ", "C", 0, 1); - x2("(?i:‚ )|a", "a", 0, 1); - n("(?i:‚ )|a", "A"); - x2("[‚ ‚¢‚¤]?", "‚ ‚¢‚¤", 0, 2); - x2("[‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 6); - x2("[^‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 0); - n("[^‚ ‚¢‚¤]+", "‚ ‚¢‚¤"); - x2("‚ ?\?", "‚ ‚ ‚ ", 0, 0); - x2("‚¢‚ ?\?‚¢", "‚¢‚ ‚¢", 0, 6); - x2("‚ *?", "‚ ‚ ‚ ", 0, 0); - x2("‚¢‚ *?", "‚¢‚ ‚ ", 0, 2); - x2("‚¢‚ *?‚¢", "‚¢‚ ‚ ‚¢", 0, 8); - x2("‚ +?", "‚ ‚ ‚ ", 0, 2); - x2("‚¢‚ +?", "‚¢‚ ‚ ", 0, 4); - x2("‚¢‚ +?‚¢", "‚¢‚ ‚ ‚¢", 0, 8); - x2("(?:“V?)?\?", "“V", 0, 0); - x2("(?:“V?\?)?", "“V", 0, 0); - x2("(?:–²?)+?", "–²–²–²", 0, 2); - x2("(?:•—+)?\?", "•—•—•—", 0, 0); - x2("(?:á+)?\?‘š", "ááá‘š", 0, 8); - x2("(?:‚ ‚¢)?{2}", "", 0, 0); - x2("(?:‹SŽÔ)?{2}", "‹SŽÔ‹SŽÔ‹S", 0, 8); - x2("(?:‹SŽÔ)*{0}", "‹SŽÔ‹SŽÔ‹S", 0, 0); - x2("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16); - n("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ"); - x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ", 0, 12); - x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16); - x2("(?:‹SŽÔ){2,4}?", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 8); - x2("(?:‹SŽÔ){,}", "‹SŽÔ{,}", 0, 7); - x2("(?:‚©‚«‚­)+?{2}", "‚©‚«‚­‚©‚«‚­‚©‚«‚­", 0, 12); - x3("(‰Î)", "‰Î", 0, 2, 1); - x3("(‰Î…)", "‰Î…", 0, 4, 1); - x2("((ŽžŠÔ))", "ŽžŠÔ", 0, 4); - x3("((•—…))", "•—…", 0, 4, 1); - x3("((ð“ú))", "ð“ú", 0, 4, 2); - x3("((((((((((((((((((((—ÊŽq))))))))))))))))))))", "—ÊŽq", 0, 4, 20); - x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 0, 4, 1); - x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 4, 8, 2); - x3("()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 3); - x3("(()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±)", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 4); - x3(".*(ƒtƒH)ƒ“Eƒ}(ƒ“()ƒVƒ…ƒ^)ƒCƒ“", "ƒtƒHƒ“Eƒ}ƒ“ƒVƒ…ƒ^ƒCƒ“", 10, 18, 2); - x2("(^‚ )", "‚ ", 0, 2); - x3("(‚ )|(‚ )", "‚¢‚ ", 2, 4, 1); - x3("(^‚ )|(‚ )", "‚¢‚ ", 2, 4, 2); - x3("(‚ ?)", "‚ ‚ ‚ ", 0, 2, 1); - x3("(‚Ü*)", "‚܂܂Ü", 0, 6, 1); - x3("(‚Æ*)", "", 0, 0, 1); - x3("(‚é+)", "‚é‚é‚é‚é‚é‚é‚é", 0, 14, 1); - x3("(‚Ó+|‚Ö*)", "‚Ó‚Ó‚Ó‚Ö‚Ö", 0, 6, 1); - x3("(‚ +|‚¢?)", "‚¢‚¢‚¢‚ ‚ ", 0, 2, 1); - x3("(‚ ‚¢‚¤)?", "‚ ‚¢‚¤", 0, 6, 1); - x3("(‚ ‚¢‚¤)*", "‚ ‚¢‚¤", 0, 6, 1); - x3("(‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1); - x3("(‚³‚µ‚·|‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1); - x3("([‚ȂɂÊ][‚©‚«‚­]|‚©‚«‚­)+", "‚©‚«‚­", 0, 6, 1); - x3("((?i:‚ ‚¢‚¤))", "‚ ‚¢‚¤", 0, 6, 1); - x3("((?m:‚ .‚¤))", "‚ \n‚¤", 0, 5, 1); - x3("((?=‚ ‚ñ)‚ )", "‚ ‚ñ‚¢", 0, 2, 1); - x3("‚ ‚¢‚¤|(.‚ ‚¢‚¦)", "‚ñ‚ ‚¢‚¦", 0, 8, 1); - x3("‚ *(.)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1); - x3("‚ *?(.)", "‚ ‚ ‚ ‚ ‚ñ", 0, 2, 1); - x3("‚ *?(‚ñ)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1); - x3("[‚¢‚¤‚¦]‚ *(.)", "‚¦‚ ‚ ‚ ‚ ‚ñ", 10, 12, 1); - x3("(\\A‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1); - n("(\\A‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤"); - x3("(^‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1); - n("(^‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤"); - x3("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é", 4, 8, 1); - n("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é‚é"); - x2("(–³)\\1", "–³–³", 0, 4); - n("(–³)\\1", "–³•"); - x2("(‹ó?)\\1", "‹ó‹ó", 0, 4); - x2("(‹ó?\?)\\1", "‹ó‹ó", 0, 0); - x2("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 8); - x3("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 4, 1); - x2("‚ (‚¢*)\\1", "‚ ‚¢‚¢‚¢‚¢", 0, 10); - x2("‚ (‚¢*)\\1", "‚ ‚¢", 0, 2); - x2("(‚ *)(‚¢*)\\1\\2", "‚ ‚ ‚ ‚¢‚¢‚ ‚ ‚ ‚¢‚¢", 0, 20); - x2("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 0, 14); - x3("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 6, 10, 2); - x2("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚ۂۂۂ؂҂ۂۂÛ", 0, 16); - x3("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚ۂۂۂ؂҂ۂۂÛ", 0, 6, 7); - x2("(‚Í)(‚Ð)(‚Ó)\\2\\1\\3", "‚͂ЂӂЂ͂Ó", 0, 12); - x2("([‚«-‚¯])\\1", "‚­‚­", 0, 4); - x2("(\\w\\d\\s)\\1", "‚ 5 ‚ 5 ", 0, 8); - n("(\\w\\d\\s)\\1", "‚ 5 ‚ 5"); - x2("(’NH|[‚ -‚¤]{3})\\1", "’NH’NH", 0, 8); - x2("...(’NH|[‚ -‚¤]{3})\\1", "‚ a‚ ’NH’NH", 0, 13); - x2("(’NH|[‚ -‚¤]{3})\\1", "‚¤‚¢‚¤‚¤‚¢‚¤", 0, 12); - x2("(^‚±)\\1", "‚±‚±", 0, 4); - n("(^‚Þ)\\1", "‚߂ނÞ"); - n("(‚ $)\\1", "‚ ‚ "); - n("(‚ ‚¢\\Z)\\1", "‚ ‚¢"); - x2("(‚ *\\Z)\\1", "‚ ", 2, 2); - x2(".(‚ *\\Z)\\1", "‚¢‚ ", 2, 4); - x3("(.(‚â‚¢‚ä)\\2)", "z‚â‚¢‚ä‚â‚¢‚ä", 0, 13, 1); - x3("(.(..\\d.)\\2)", "‚ 12341234", 0, 10, 1); - x2("((?i:‚ v‚¸))\\1", "‚ v‚¸‚ v‚¸", 0, 10); - x2("(?<‹ð‚©>•Ï|\\(\\g<‹ð‚©>\\))", "((((((•Ï))))))", 0, 14); - x2("\\A(?:\\g<ˆ¢-1>|\\g<‰]-2>|\\zI—¹ (?<ˆ¢-1>ŠÏ|Ž©\\g<‰]-2>Ž©)(?<‰]-2>Ý|•ìŽF\\g<ˆ¢-1>•ìŽF))$", "•ìŽFŽ©•ìŽFŽ©ÝŽ©•ìŽFŽ©•ìŽF", 0, 26); - x2("[[‚ЂÓ]]", "‚Ó", 0, 2); - x2("[[‚¢‚¨‚¤]‚©]", "‚©", 0, 2); - n("[[^‚ ]]", "‚ "); - n("[^[‚ ]]", "‚ "); - x2("[^[^‚ ]]", "‚ ", 0, 2); - x2("[[‚©‚«‚­]&&‚«‚­]", "‚­", 0, 2); - n("[[‚©‚«‚­]&&‚«‚­]", "‚©"); - n("[[‚©‚«‚­]&&‚«‚­]", "‚¯"); - x2("[‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï", 0, 2); - n("[^‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï"); - x2("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚¢", 0, 2); - n("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚ "); - x2("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚«", 0, 2); - n("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚¢"); - x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¤", 0, 2); - x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¦", 0, 2); - n("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚©"); - x2("[‚ -&&-‚ ]", "-", 0, 1); - x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]q-w]", "‚¦", 0, 2); - x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "f", 0, 1); - x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "g", 0, 1); - n("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "2"); - fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d\n", nsucc, nfail); + x2(".", "¤¢", 0, 2); + x2("..", "¤«¤­", 0, 4); + x2("\\w", "¤ª", 0, 2); + n("\\W", "¤¢"); + x2("[\\W]", "¤¦$", 2, 3); + x2("\\S", "¤½", 0, 2); + x2("\\S", "´Á", 0, 2); + x2("\\b", "µ¤ ", 0, 0); + x2("\\b", " ¤Û", 1, 1); + x2("\\B", "¤»¤½ ", 2, 2); + x2("\\B", "¤¦ ", 3, 3); + x2("\\B", " ¤¤", 0, 0); + x2("[¤¿¤Á]", "¤Á", 0, 2); + n("[¤Ê¤Ë]", "¤Ì"); + x2("[¤¦-¤ª]", "¤¨", 0, 2); + n("[^¤±]", "¤±"); + x2("[\\w]", "¤Í", 0, 2); + n("[\\d]", "¤Õ"); + x2("[\\D]", "¤Ï", 0, 2); + n("[\\s]", "¤¯"); + x2("[\\S]", "¤Ø", 0, 2); + x2("[\\w\\d]", "¤è", 0, 2); + x2("[\\w\\d]", " ¤è", 3, 5); + n("\\wµ´¼Ö", " µ´¼Ö"); + x2("µ´\\W¼Ö", "µ´ ¼Ö", 0, 5); + x2("¤¢.¤¤.¤¦", "¤¢¤¢¤¤¤¤¤¦", 0, 10); + x2(".\\w¤¦\\W..¤¾", "¤¨¤¦¤¦ ¤¦¤¾¤¾", 0, 13); + x2("\\s\\w¤³¤³¤³", " ¤³¤³¤³¤³", 0, 9); + x2("¤¢¤¢.¤±", "¤¢¤¢¤±¤±", 0, 8); + n(".¤¤", "¤¤¤¨"); + x2(".¤ª", "¤ª¤ª", 0, 4); + x2("^¤¢", "¤¢", 0, 2); + x2("^¤à$", "¤à", 0, 2); + x2("^\\w$", "¤Ë", 0, 2); + x2("^\\w¤«¤­¤¯¤±¤³$", "z¤«¤­¤¯¤±¤³", 0, 11); + x2("^\\w...¤¦¤¨¤ª$", "z¤¢¤¤¤¦¤¦¤¨¤ª", 0, 13); + x2("\\w\\w\\s\\W¤ª¤ª¤ª\\d", "a¤ª ¤ª¤ª¤ª4", 0, 12); + x2("\\A¤¿¤Á¤Ä", "¤¿¤Á¤Ä", 0, 6); + x2("¤à¤á¤â\\Z", "¤à¤á¤â", 0, 6); + x2("¤«¤­¤¯\\z", "¤«¤­¤¯", 0, 6); + x2("¤«¤­¤¯\\Z", "¤«¤­¤¯\n", 0, 6); + x2("\\G¤Ý¤Ô", "¤Ý¤Ô", 0, 4); + n("\\G¤¨", "¤¦¤¨¤ª"); + n("¤È¤Æ\\G", "¤È¤Æ"); + n("¤Þ¤ß\\A", "¤Þ¤ß"); + n("¤Þ\\A¤ß", "¤Þ¤ß"); + x2("(?=¤»)¤»", "¤»", 0, 2); + n("(?=¤¦).", "¤¤"); + x2("(?!¤¦)¤«", "¤«", 0, 2); + n("(?!¤È)¤¢", "¤È"); + x2("(?i:¤¢)", "¤¢", 0, 2); + x2("(?i:¤Ö¤Ù)", "¤Ö¤Ù", 0, 4); + n("(?i:¤¤)", "¤¦"); + x2("(?m:¤è.)", "¤è\n", 0, 3); + x2("(?m:.¤á)", "¤Þ\n¤á", 2, 5); + x2("¤¢?", "", 0, 0); + x2("ÊÑ?", "²½", 0, 0); + x2("ÊÑ?", "ÊÑ", 0, 2); + x2("ÎÌ*", "", 0, 0); + x2("ÎÌ*", "ÎÌ", 0, 2); + x2("»Ò*", "»Ò»Ò»Ò", 0, 6); + x2("ÇÏ*", "¼¯ÇÏÇÏÇÏÇÏ", 0, 0); + n("»³+", ""); + x2("²Ï+", "²Ï", 0, 2); + x2("»þ+", "»þ»þ»þ»þ", 0, 8); + x2("¤¨+", "¤¨¤¨¤¦¤¦¤¦", 0, 4); + x2("¤¦+", "¤ª¤¦¤¦¤¦¤¦", 2, 10); + x2(".?", "¤¿", 0, 2); + x2(".*", "¤Ñ¤Ô¤×¤Ú", 0, 8); + x2(".+", "¤í", 0, 2); + x2(".+", "¤¤¤¦¤¨¤«\n", 0, 8); + x2("¤¢|¤¤", "¤¢", 0, 2); + x2("¤¢|¤¤", "¤¤", 0, 2); + x2("¤¢¤¤|¤¤¤¦", "¤¢¤¤", 0, 4); + x2("¤¢¤¤|¤¤¤¦", "¤¤¤¦", 0, 4); + x2("¤ò(?:¤«¤­|¤­¤¯)", "¤ò¤«¤­", 0, 6); + x2("¤ò(?:¤«¤­|¤­¤¯)¤±", "¤ò¤­¤¯¤±", 0, 8); + x2("¤¢¤¤|(?:¤¢¤¦|¤¢¤ò)", "¤¢¤ò", 0, 4); + x2("¤¢|¤¤|¤¦", "¤¨¤¦", 2, 4); + x2("¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤·¤¹¤»", 0, 6); + n("¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤¹¤»"); + x2("¤¢|^¤ï", "¤Ö¤¢", 2, 4); + x2("¤¢|^¤ò", "¤ò¤¢", 0, 2); + x2("µ´|\\G¼Ö", "¤±¼Öµ´", 4, 6); + x2("µ´|\\G¼Ö", "¼Öµ´", 0, 2); + x2("µ´|\\A¼Ö", "b¼Öµ´", 3, 5); + x2("µ´|\\A¼Ö", "¼Ö", 0, 2); + x2("µ´|¼Ö\\Z", "¼Öµ´", 2, 4); + x2("µ´|¼Ö\\Z", "¼Ö", 0, 2); + x2("µ´|¼Ö\\Z", "¼Ö\n", 0, 2); + x2("µ´|¼Ö\\z", "¼Öµ´", 2, 4); + x2("µ´|¼Ö\\z", "¼Ö", 0, 2); + x2("\\w|\\s", "¤ª", 0, 2); + x2("\\w|%", "%¤ª", 0, 1); + x2("\\w|[&$]", "¤¦&", 0, 2); + x2("[¤¤-¤±]", "¤¦", 0, 2); + x2("[¤¤-¤±]|[^¤«-¤³]", "¤¢", 0, 2); + x2("[¤¤-¤±]|[^¤«-¤³]", "¤«", 0, 2); + x2("[^¤¢]", "\n", 0, 1); + x2("(?:¤¢|[¤¦-¤­])|¤¤¤ò", "¤¦¤ò", 0, 2); + x2("(?:¤¢|[¤¦-¤­])|¤¤¤ò", "¤¤¤ò", 0, 4); + x2("¤¢¤¤¤¦|(?=¤±¤±)..¤Û", "¤±¤±¤Û", 0, 6); + x2("¤¢¤¤¤¦|(?!¤±¤±)..¤Û", "¤¢¤¤¤Û", 0, 6); + x2("(?=¤ò¤¢)..¤¢|(?=¤ò¤ò)..¤¢", "¤ò¤ò¤¢", 0, 6); + x2("(?<=¤¢|¤¤¤¦)¤¤", "¤¤¤¦¤¤", 4, 6); + n("(?>¤¢|¤¢¤¤¤¨)¤¦", "¤¢¤¤¤¨¤¦"); + x2("(?>¤¢¤¤¤¨|¤¢)¤¦", "¤¢¤¤¤¨¤¦", 0, 8); + x2("¤¢?|¤¤", "¤¢", 0, 2); + x2("¤¢?|¤¤", "¤¤", 0, 0); + x2("¤¢?|¤¤", "", 0, 0); + x2("¤¢*|¤¤", "¤¢¤¢", 0, 4); + x2("¤¢*|¤¤*", "¤¤¤¢", 0, 0); + x2("¤¢*|¤¤*", "¤¢¤¤", 0, 2); + x2("[a¤¢]*|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 3); + x2("¤¢+|¤¤*", "", 0, 0); + x2("¤¢+|¤¤*", "¤¤¤¤¤¤", 0, 6); + x2("¤¢+|¤¤*", "¤¢¤¤¤¤¤¤", 0, 2); + x2("¤¢+|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 0); + n("¤¢+|¤¤+", ""); + x2("(¤¢|¤¤)?", "¤¤", 0, 2); + x2("(¤¢|¤¤)*", "¤¤¤¢", 0, 4); + x2("(¤¢|¤¤)+", "¤¤¤¢¤¤", 0, 6); + x2("(¤¢¤¤|¤¦¤¢)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 0, 8); + x2("(¤¢¤¤|¤¦¤¨)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 4, 12); + x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¢¤¤¤¦¤¢", 2, 10); + x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¤¤ò¤¦¤¢", 0, 4); + x2("(¤¢¤¤|¤¦¤¢)+", "$$zzzz¤¢¤¤¤ò¤¦¤¢", 6, 10); + x2("(¤¢|¤¤¤¢¤¤)+", "¤¢¤¤¤¢¤¤¤¢", 0, 10); + x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢", 2, 4); + x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢¤¢¤¢¤¤¤¢", 2, 8); + x2("(?:¤¢|¤¤)(?:¤¢|¤¤)", "¤¢¤¤", 0, 4); + x2("(?:¤¢*|¤¤*)(?:¤¢*|¤¤*)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 6); + x2("(?:¤¢*|¤¤*)(?:¤¢+|¤¤+)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12); + x2("(?:¤¢+|¤¤+){2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12); + x2("(?:¤¢+|¤¤+){1,2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12); + x2("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¦¤¦", 0, 4); + n("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¢¤¤¤¦¤¦"); + x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¢¤¤¤¦", 12, 16); + x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¤¤¦", 0, 14); + x2("¤¦{0,}", "¤¦¤¦¤¦¤¦", 0, 8); + x2("¤¢|(?i)c", "C", 0, 1); + x2("(?i)c|¤¢", "C", 0, 1); + x2("(?i:¤¢)|a", "a", 0, 1); + n("(?i:¤¢)|a", "A"); + x2("[¤¢¤¤¤¦]?", "¤¢¤¤¤¦", 0, 2); + x2("[¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 6); + x2("[^¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 0); + n("[^¤¢¤¤¤¦]+", "¤¢¤¤¤¦"); + x2("¤¢??", "¤¢¤¢¤¢", 0, 0); + x2("¤¤¤¢??¤¤", "¤¤¤¢¤¤", 0, 6); + x2("¤¢*?", "¤¢¤¢¤¢", 0, 0); + x2("¤¤¤¢*?", "¤¤¤¢¤¢", 0, 2); + x2("¤¤¤¢*?¤¤", "¤¤¤¢¤¢¤¤", 0, 8); + x2("¤¢+?", "¤¢¤¢¤¢", 0, 2); + x2("¤¤¤¢+?", "¤¤¤¢¤¢", 0, 4); + x2("¤¤¤¢+?¤¤", "¤¤¤¢¤¢¤¤", 0, 8); + x2("(?:Å·?)??", "Å·", 0, 0); + x2("(?:Å·??)?", "Å·", 0, 0); + x2("(?:Ì´?)+?", "Ì´Ì´Ì´", 0, 2); + x2("(?:É÷+)??", "É÷É÷É÷", 0, 0); + x2("(?:Àã+)??Áú", "ÀãÀãÀãÁú", 0, 8); + x2("(?:¤¢¤¤)?{2}", "", 0, 0); + x2("(?:µ´¼Ö)?{2}", "µ´¼Öµ´¼Öµ´", 0, 8); + x2("(?:µ´¼Ö)*{0}", "µ´¼Öµ´¼Öµ´", 0, 0); + x2("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16); + n("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Ö"); + x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Ö", 0, 12); + x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16); + x2("(?:µ´¼Ö){2,4}?", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 8); + x2("(?:µ´¼Ö){,}", "µ´¼Ö{,}", 0, 7); + x2("(?:¤«¤­¤¯)+?{2}", "¤«¤­¤¯¤«¤­¤¯¤«¤­¤¯", 0, 12); + x3("(²Ð)", "²Ð", 0, 2, 1); + x3("(²Ð¿å)", "²Ð¿å", 0, 4, 1); + x2("((»þ´Ö))", "»þ´Ö", 0, 4); + x3("((É÷¿å))", "É÷¿å", 0, 4, 1); + x3("((ºòÆü))", "ºòÆü", 0, 4, 2); + x3("((((((((((((((((((((ÎÌ»Ò))))))))))))))))))))", "ÎÌ»Ò", 0, 4, 20); + x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 0, 4, 1); + x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 4, 8, 2); + x3("()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³", "¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³", 6, 12, 3); + x3("(()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³)", "¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³", 6, 12, 4); + x3(".*(¥Õ¥©)¥ó¡¦¥Þ(¥ó()¥·¥å¥¿)¥¤¥ó", "¥Õ¥©¥ó¡¦¥Þ¥ó¥·¥å¥¿¥¤¥ó", 10, 18, 2); + x2("(^¤¢)", "¤¢", 0, 2); + x3("(¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 1); + x3("(^¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 2); + x3("(¤¢?)", "¤¢¤¢¤¢", 0, 2, 1); + x3("(¤Þ*)", "¤Þ¤Þ¤Þ", 0, 6, 1); + x3("(¤È*)", "", 0, 0, 1); + x3("(¤ë+)", "¤ë¤ë¤ë¤ë¤ë¤ë¤ë", 0, 14, 1); + x3("(¤Õ+|¤Ø*)", "¤Õ¤Õ¤Õ¤Ø¤Ø", 0, 6, 1); + x3("(¤¢+|¤¤?)", "¤¤¤¤¤¤¤¢¤¢", 0, 2, 1); + x3("(¤¢¤¤¤¦)?", "¤¢¤¤¤¦", 0, 6, 1); + x3("(¤¢¤¤¤¦)*", "¤¢¤¤¤¦", 0, 6, 1); + x3("(¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1); + x3("(¤µ¤·¤¹|¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1); + x3("([¤Ê¤Ë¤Ì][¤«¤­¤¯]|¤«¤­¤¯)+", "¤«¤­¤¯", 0, 6, 1); + x3("((?i:¤¢¤¤¤¦))", "¤¢¤¤¤¦", 0, 6, 1); + x3("((?m:¤¢.¤¦))", "¤¢\n¤¦", 0, 5, 1); + x3("((?=¤¢¤ó)¤¢)", "¤¢¤ó¤¤", 0, 2, 1); + x3("¤¢¤¤¤¦|(.¤¢¤¤¤¨)", "¤ó¤¢¤¤¤¨", 0, 8, 1); + x3("¤¢*(.)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1); + x3("¤¢*?(.)", "¤¢¤¢¤¢¤¢¤ó", 0, 2, 1); + x3("¤¢*?(¤ó)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1); + x3("[¤¤¤¦¤¨]¤¢*(.)", "¤¨¤¢¤¢¤¢¤¢¤ó", 10, 12, 1); + x3("(\\A¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1); + n("(\\A¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦"); + x3("(^¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1); + n("(^¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦"); + x3("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë", 4, 8, 1); + n("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë¤ë"); + x2("(̵)\\1", "̵̵", 0, 4); + n("(̵)\\1", "̵Éð"); + x2("(¶õ?)\\1", "¶õ¶õ", 0, 4); + x2("(¶õ??)\\1", "¶õ¶õ", 0, 0); + x2("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 8); + x3("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 4, 1); + x2("¤¢(¤¤*)\\1", "¤¢¤¤¤¤¤¤¤¤", 0, 10); + x2("¤¢(¤¤*)\\1", "¤¢¤¤", 0, 2); + x2("(¤¢*)(¤¤*)\\1\\2", "¤¢¤¢¤¢¤¤¤¤¤¢¤¢¤¢¤¤¤¤", 0, 20); + x2("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 0, 14); + x3("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 6, 10, 2); + x2("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 16); + x3("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 6, 7); + x2("(¤Ï)(¤Ò)(¤Õ)\\2\\1\\3", "¤Ï¤Ò¤Õ¤Ò¤Ï¤Õ", 0, 12); + x2("([¤­-¤±])\\1", "¤¯¤¯", 0, 4); + x2("(\\w\\d\\s)\\1", "¤¢5 ¤¢5 ", 0, 8); + n("(\\w\\d\\s)\\1", "¤¢5 ¤¢5"); + x2("(ï¡©|[¤¢-¤¦]{3})\\1", "ï¡©", 0, 8); + x2("...(ï¡©|[¤¢-¤¦]{3})\\1", "¤¢a¤¢Ã¯¡©Ã¯¡©", 0, 13); + x2("(ï¡©|[¤¢-¤¦]{3})\\1", "¤¦¤¤¤¦¤¦¤¤¤¦", 0, 12); + x2("(^¤³)\\1", "¤³¤³", 0, 4); + n("(^¤à)\\1", "¤á¤à¤à"); + n("(¤¢$)\\1", "¤¢¤¢"); + n("(¤¢¤¤\\Z)\\1", "¤¢¤¤"); + x2("(¤¢*\\Z)\\1", "¤¢", 2, 2); + x2(".(¤¢*\\Z)\\1", "¤¤¤¢", 2, 4); + x3("(.(¤ä¤¤¤æ)\\2)", "z¤ä¤¤¤æ¤ä¤¤¤æ", 0, 13, 1); + x3("(.(..\\d.)\\2)", "¤¢12341234", 0, 10, 1); + x2("((?i:¤¢v¤º))\\1", "¤¢v¤º¤¢v¤º", 0, 10); + x2("(?<¶ò¤«>ÊÑ|\\(\\g<¶ò¤«>\\))", "((((((ÊÑ))))))", 0, 14); + x2("\\A(?:\\g<°¤_1>|\\g<±¾_2>|\\z½ªÎ» (?<°¤_1>´Ñ|¼«\\g<±¾_2>¼«)(?<±¾_2>ºß|Êî»§\\g<°¤_1>Êî»§))$", "Êî»§¼«Êî»§¼«ºß¼«Êî»§¼«Êî»§", 0, 26); + x2("[[¤Ò¤Õ]]", "¤Õ", 0, 2); + x2("[[¤¤¤ª¤¦]¤«]", "¤«", 0, 2); + n("[[^¤¢]]", "¤¢"); + n("[^[¤¢]]", "¤¢"); + x2("[^[^¤¢]]", "¤¢", 0, 2); + x2("[[¤«¤­¤¯]&&¤­¤¯]", "¤¯", 0, 2); + n("[[¤«¤­¤¯]&&¤­¤¯]", "¤«"); + n("[[¤«¤­¤¯]&&¤­¤¯]", "¤±"); + x2("[¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ", 0, 2); + n("[^¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ"); + x2("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¤", 0, 2); + n("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¢"); + x2("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤­", 0, 2); + n("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤¤"); + x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¦", 0, 2); + x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¨", 0, 2); + n("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤«"); + x2("[¤¢-&&-¤¢]", "-", 0, 1); + x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]q-w]", "¤¨", 0, 2); + x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "f", 0, 1); + x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "g", 0, 1); + n("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "2"); + fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d\n", + nsucc, nfail, nerror); #ifndef POSIX_TEST - regex_region_free(region, 1); - regex_end(); + onig_region_free(region, 1); + onig_end(); #endif return 0; diff --git a/ext/mbstring/oniguruma/testconv.rb b/ext/mbstring/oniguruma/testconv.rb deleted file mode 100644 index afaa673d90..0000000000 --- a/ext/mbstring/oniguruma/testconv.rb +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/local/bin/ruby -Ke -# testconv.rb -# Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) - -WINDOWS = (ARGV.size > 0 && /^-win/i =~ ARGV[0]) -ARGV.shift if WINDOWS - -if WINDOWS - REGCODE = 'REGCODE_SJIS' - REGENC = 'REG_ENCODING_SJIS' -else - REGCODE = 'REGCODE_EUCJP' - REGENC = 'REG_ENCODING_EUC_JP' -end - -def conv_reg(s) - s = s.gsub(/\\/, '\\\\\\\\') #' - if (WINDOWS) - s = s.gsub(/\?\?/, '?\\\\?') # escape ANSI trigraph - end - s -end - -def conv_str(s) - if (s[0] == ?') - s = s[1..-2] - return s.gsub(/\\/, '\\\\\\\\') #' - else - return s[1..-2] - end -end - -print(<<"EOS") -/* - * This program was generated by testconv.rb. - */ -#include - -#ifdef POSIX_TEST -#include "onigposix.h" -#else -#include "oniguruma.h" -#endif - -static int nsucc = 0; -static int nfail = 0; - -#ifndef POSIX_TEST -static RegRegion* region; -#endif - -static void xx(char* pattern, char* str, int from, int to, int mem, int not) -{ - int r; - -#ifdef POSIX_TEST - regex_t reg; - char buf[200]; - regmatch_t pmatch[20]; - - r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\\n", buf); - exit(-1); - } - - r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); - if (r != 0 && r != REG_NOMATCH) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(stderr, "ERROR: %s\\n", buf); - exit(-1); - } - - if (r == REG_NOMATCH) { - if (not) { - fprintf(stdout, "OK(N): /%s/ '%s'\\n", pattern, str); - nsucc++; - } - else { - fprintf(stdout, "FAIL: /%s/ '%s'\\n", pattern, str); - nfail++; - } - } - else { - if (not) { - fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", pattern, str); - nfail++; - } - else { - if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { - fprintf(stdout, "OK: /%s/ '%s'\\n", pattern, str); - nsucc++; - } - else { - fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", pattern, str, - from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); - nfail++; - } - } - } - regfree(®); - -#else - regex_t* reg; - RegErrorInfo einfo; - - r = regex_new(®, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)), - REG_OPTION_DEFAULT, #{REGCODE}, REG_SYNTAX_DEFAULT, &einfo); - if (r) { - char s[REG_MAX_ERROR_MESSAGE_LEN]; - regex_error_code_to_str(s, r, &einfo); - fprintf(stderr, "ERROR: %s\\n", s); - exit(-1); - } - - r = regex_search(reg, (UChar* )str, (UChar* )(str + strlen(str)), - (UChar* )str, (UChar* )(str + strlen(str)), - region, REG_OPTION_NONE); - if (r < REG_MISMATCH) { - char s[REG_MAX_ERROR_MESSAGE_LEN]; - regex_error_code_to_str(s, r); - fprintf(stderr, "ERROR: %s\\n", s); - exit(-1); - } - - if (r == REG_MISMATCH) { - if (not) { - fprintf(stdout, "OK(N): /%s/ '%s'\\n", pattern, str); - nsucc++; - } - else { - fprintf(stdout, "FAIL: /%s/ '%s'\\n", pattern, str); - nfail++; - } - } - else { - if (not) { - fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", pattern, str); - nfail++; - } - else { - if (region->beg[mem] == from && region->end[mem] == to) { - fprintf(stdout, "OK: /%s/ '%s'\\n", pattern, str); - nsucc++; - } - else { - fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", pattern, str, - from, to, region->beg[mem], region->end[mem]); - nfail++; - } - } - } - regex_free(reg); -#endif -} - -static void x2(char* pattern, char* str, int from, int to) -{ - xx(pattern, str, from, to, 0, 0); -} - -static void x3(char* pattern, char* str, int from, int to, int mem) -{ - xx(pattern, str, from, to, mem, 0); -} - -static void n(char* pattern, char* str) -{ - xx(pattern, str, 0, 0, 0, 1); -} - -extern int main(int argc, char* argv[]) -{ -#ifdef POSIX_TEST - reg_set_encoding(#{REGENC}); -#else - region = regex_region_new(); -#endif - -EOS - -CM = '\s*,\s*' -RX2 = %r{^x\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)$} -RI2 = %r{^i\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)} -RX3 = %r{^x\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)#{CM}(\S+)\)$} -RN = %r{^n\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")\)$} #' - -while line = gets() - if (m = RX2.match(line)) - reg = conv_reg(m[1]) - str = conv_str(m[2]) - printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, m[3], m[4]) - elsif (m = RI2.match(line)) - reg = conv_reg(m[1]) - str = conv_str(m[2]) - printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, m[3], m[4]) - elsif (m = RX3.match(line)) - reg = conv_reg(m[1]) - str = conv_str(m[2]) - printf(" x3(\"%s\", \"%s\", %s, %s, %s);\n", reg, str, m[3], m[4], m[5]) - elsif (m = RN.match(line)) - reg = conv_reg(m[1]) - str = conv_str(m[2]) - printf(" n(\"%s\", \"%s\");\n", reg, str) - else - - end -end - -print(<<'EOS') - fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d\n", nsucc, nfail); - -#ifndef POSIX_TEST - regex_region_free(region, 1); - regex_end(); -#endif - - return 0; -} -EOS - -# END OF SCRIPT diff --git a/ext/mbstring/oniguruma/win32/Makefile b/ext/mbstring/oniguruma/win32/Makefile deleted file mode 100644 index bb20474e8f..0000000000 --- a/ext/mbstring/oniguruma/win32/Makefile +++ /dev/null @@ -1,131 +0,0 @@ -# Oniguruma Makefile for Win32 - -product_name = oniguruma - -CPPFLAGS = -CFLAGS = -O2 -nologo -LDFLAGS = -LOADLIBES = -ARLIB = lib -ARLIB_FLAGS = -nologo -ARDLL = cl -ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll -LINKFLAGS = -link -incremental:no -pdb:none - -INSTALL = install -c -CP = copy -CC = cl -DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT -RUBYDIR = .. - -subdirs = - -libbase = onig -libname = $(libbase)_s.lib -dllname = $(libbase).dll -dlllib = $(libbase).lib - -onigheaders = oniguruma.h regint.h regparse.h -posixheaders = onigposix.h -headers = $(posixheaders) $(onigheaders) - -onigobjs = reggnu.obj regerror.obj regparse.obj regcomp.obj regexec.obj -posixobjs = regposix.obj regposerr.obj -libobjs = $(onigobjs) $(posixobjs) - -onigsources = regerror.c regparse.c regcomp.c regexec.c reggnu.c -posixsources = regposix.c regposerr.c -libsources = $(posixsources) $(onigsources) -rubysources = regex.c $(onigsources) - -patchfiles = re.c.168.patch re.c.180.patch -distfiles = README COPYING INSTALL-RUBY HISTORY \ - Makefile.in configure.in config.h.in configure \ - $(headers) $(libsources) regex.c $(patchfiles) \ - test.rb testconv.rb -testc = testc -testp = testp - -makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' - -.SUFFIXES: -.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo - -.c.obj: - $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /c $< - -# targets -default: all - -all: $(libname) $(dllname) - -$(libname): $(libobjs) - $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) - -$(dllname): $(libobjs) - $(ARDLL) $(libobjs) -Fe$@ $(ARDLL_FLAGS) - -regparse.obj: regparse.c $(onigheaders) config.h -regcomp.obj: regcomp.c $(onigheaders) config.h -regexec.obj: regexec.c regint.h oniguruma.h config.h -reggnu.obj: reggnu.c regint.h oniguruma.h config.h -regerror.obj: regerror.c regint.h oniguruma.h config.h -regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h -regposerr.obj: regposerr.c $(posixheaders) config.h - -# Ruby test -rtest: - $(RUBYDIR)\win32\ruby -w -Ke test.rb - -# C library test -ctest: $(testc) - .\$(testc) - -# POSIX C library test -ptest: $(testp) - .\$(testp) - -$(testc): $(testc).c $(libname) - $(CC) -nologo -o $(testc) $(testc).c $(libname) - -$(testp): $(testc).c $(dlllib) - $(CC) -nologo -DPOSIX_TEST -DIMPORT -o $(testp) $(testc).c $(dlllib) - -clean: - del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj - - -16: cpruby - patch -d $(RUBYDIR) -p0 < re.c.168.patch - -18: cpruby - patch -d $(RUBYDIR) -p0 < re.c.180.patch - -# backup file suffix -SORIG = ruby_orig - -cpruby: - $(CP) $(RUBYDIR)\regex.c $(RUBYDIR)\regex.c.$(SORIG) - $(CP) $(RUBYDIR)\regex.h $(RUBYDIR)\regex.h.$(SORIG) - $(CP) $(RUBYDIR)\re.c $(RUBYDIR)\re.c.$(SORIG) -# $(rubysources) - $(CP) regex.c $(RUBYDIR) - $(CP) regerror.c $(RUBYDIR) - $(CP) regparse.c $(RUBYDIR) - $(CP) regcomp.c $(RUBYDIR) - $(CP) regexec.c $(RUBYDIR) - $(CP) reggnu.c $(RUBYDIR) -# $(onigheaders) - $(CP) oniguruma.h $(RUBYDIR)\regex.h - $(CP) regint.h $(RUBYDIR) - $(CP) regparse.h $(RUBYDIR) - -rback: - $(CP) $(RUBYDIR)\regex.c.$(SORIG) $(RUBYDIR)\regex.c - $(CP) $(RUBYDIR)\regex.h.$(SORIG) $(RUBYDIR)\regex.h - $(CP) $(RUBYDIR)\re.c.$(SORIG) $(RUBYDIR)\re.c - -samples: - $(CC) $(CFLAGS) -I. -DIMPORT -o simple sample\simple.c $(dlllib) - $(CC) $(CFLAGS) -I. -DIMPORT -o posix sample\posix.c $(dlllib) - $(CC) $(CFLAGS) -I. -DIMPORT -o names sample\names.c $(dlllib) diff --git a/ext/mbstring/oniguruma/win32/config.h b/ext/mbstring/oniguruma/win32/config.h deleted file mode 100644 index bdbdaf25c1..0000000000 --- a/ext/mbstring/oniguruma/win32/config.h +++ /dev/null @@ -1,84 +0,0 @@ -#define STDC_HEADERS 1 -#define HAVE_SYS_TYPES_H 1 -#define HAVE_SYS_STAT_H 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STRING_H 1 -#define HAVE_MEMORY_H 1 -#define HAVE_FLOAT_H 1 -#define HAVE_OFF_T 1 -#define SIZEOF_INT 4 -#define SIZEOF_SHORT 2 -#define SIZEOF_LONG 4 -#define SIZEOF_LONG_LONG 0 -#define SIZEOF___INT64 8 -#define SIZEOF_OFF_T 4 -#define SIZEOF_VOIDP 4 -#define SIZEOF_FLOAT 4 -#define SIZEOF_DOUBLE 8 -#define HAVE_PROTOTYPES 1 -#define TOKEN_PASTE(x,y) x##y -#define HAVE_STDARG_PROTOTYPES 1 -#ifndef NORETURN -#if _MSC_VER > 1100 -#define NORETURN(x) __declspec(noreturn) x -#else -#define NORETURN(x) x -#endif -#endif -#define HAVE_DECL_SYS_NERR 1 -#define STDC_HEADERS 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STRING_H 1 -#define HAVE_LIMITS_H 1 -#define HAVE_FCNTL_H 1 -#define HAVE_SYS_UTIME_H 1 -#define HAVE_MEMORY_H 1 -#define uid_t int -#define gid_t int -#define HAVE_STRUCT_STAT_ST_RDEV 1 -#define HAVE_ST_RDEV 1 -#define GETGROUPS_T int -#define RETSIGTYPE void -#define HAVE_ALLOCA 1 -#define HAVE_DUP2 1 -#define HAVE_MEMCMP 1 -#define HAVE_MEMMOVE 1 -#define HAVE_MKDIR 1 -#define HAVE_STRCASECMP 1 -#define HAVE_STRNCASECMP 1 -#define HAVE_STRERROR 1 -#define HAVE_STRFTIME 1 -#define HAVE_STRCHR 1 -#define HAVE_STRSTR 1 -#define HAVE_STRTOD 1 -#define HAVE_STRTOL 1 -#define HAVE_STRTOUL 1 -#define HAVE_FLOCK 1 -#define HAVE_VSNPRINTF 1 -#define HAVE_FINITE 1 -#define HAVE_FMOD 1 -#define HAVE_FREXP 1 -#define HAVE_HYPOT 1 -#define HAVE_MODF 1 -#define HAVE_WAITPID 1 -#define HAVE_CHSIZE 1 -#define HAVE_TIMES 1 -#define HAVE__SETJMP 1 -#define HAVE_TELLDIR 1 -#define HAVE_SEEKDIR 1 -#define HAVE_MKTIME 1 -#define HAVE_COSH 1 -#define HAVE_SINH 1 -#define HAVE_TANH 1 -#define HAVE_EXECVE 1 -#define HAVE_TZNAME 1 -#define HAVE_DAYLIGHT 1 -#define SETPGRP_VOID 1 -#define inline __inline -#define NEED_IO_SEEK_BETWEEN_RW 1 -#define RSHIFT(x,y) ((x)>>(int)y) -#define FILE_COUNT _cnt -#define FILE_READPTR _ptr -#define DEFAULT_KCODE KCODE_NONE -#define DLEXT ".so" -#define DLEXT2 ".dll" diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index cf9afa8082..b1543f2909 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -37,22 +37,22 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) /* {{{ static void php_mb_regex_free_cache() */ static void php_mb_regex_free_cache(php_mb_regex_t **pre) { - php_mb_regex_free(*pre); + onig_free(*pre); } /* }}} */ /* {{{ _php_mb_regex_globals_ctor */ void _php_mb_regex_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC) { - MBSTRG(default_mbctype) = REGCODE_EUCJP; - MBSTRG(current_mbctype) = REGCODE_EUCJP; + MBSTRG(default_mbctype) = ONIG_ENCODING_EUC_JP; + MBSTRG(current_mbctype) = ONIG_ENCODING_EUC_JP; zend_hash_init(&(MBSTRG(ht_rc)), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1); MBSTRG(search_str) = (zval*) NULL; MBSTRG(search_re) = (php_mb_regex_t*)NULL; MBSTRG(search_pos) = 0; - MBSTRG(search_regs) = (php_mb_reg_region*)NULL; - MBSTRG(regex_default_options) = RE_OPTION_POSIXLINE; - MBSTRG(regex_default_syntax) = REG_SYNTAX_RUBY; + MBSTRG(search_regs) = (OnigRegion*)NULL; + MBSTRG(regex_default_options) = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; + MBSTRG(regex_default_syntax) = ONIG_SYNTAX_RUBY; } /* }}} */ @@ -96,8 +96,8 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex) MBSTRG(search_pos) = 0; if (MBSTRG(search_regs) != NULL) { - php_mb_regex_region_free(MBSTRG(search_regs), 1); - MBSTRG(search_regs) = (php_mb_reg_region *)NULL; + onig_region_free(MBSTRG(search_regs), 1); + MBSTRG(search_regs) = (OnigRegion *)NULL; } zend_hash_clean(&MBSTRG(ht_rc)); @@ -108,53 +108,152 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex) /* * encoding name resolver */ + +/* {{{ encoding name map */ +typedef struct _php_mb_regex_enc_name_map_t { + const char *names; + OnigEncoding code; +} php_mb_regex_enc_name_map_t; + +php_mb_regex_enc_name_map_t enc_name_map[] ={ + { + "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0", + ONIG_ENCODING_EUC_JP + }, + { + "UTF-8\0UTF8\0", + ONIG_ENCODING_UTF8 + }, + { + "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0", + ONIG_ENCODING_SJIS + }, + { + "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0", + ONIG_ENCODING_BIG5 + }, + { + "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0", + ONIG_ENCODING_EUC_CN + }, + { + "EUC-TW\0EUCTW\0EUC_TW\0", + ONIG_ENCODING_EUC_TW + }, + { + "EUC-KR\0EUCKR\0EUC_KR\0", + ONIG_ENCODING_EUC_KR + }, + { + "KOI8\0KOI-8\0", + ONIG_ENCODING_KOI8 + }, + { + "KOI8R\0KOI8-R\0KOI-8R\0", + ONIG_ENCODING_KOI8_R + }, + { + "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0", + ONIG_ENCODING_ISO_8859_1 + }, + { + "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0", + ONIG_ENCODING_ISO_8859_2 + }, + { + "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0", + ONIG_ENCODING_ISO_8859_3 + }, + { + "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0", + ONIG_ENCODING_ISO_8859_4 + }, + { + "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0", + ONIG_ENCODING_ISO_8859_5 + }, + { + "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0", + ONIG_ENCODING_ISO_8859_6 + }, + { + "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0", + ONIG_ENCODING_ISO_8859_7 + }, + { + "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0", + ONIG_ENCODING_ISO_8859_8 + }, + { + "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0", + ONIG_ENCODING_ISO_8859_9 + }, + { + "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0", + ONIG_ENCODING_ISO_8859_10 + }, + { + "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0", + ONIG_ENCODING_ISO_8859_11 + }, + { + "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0", + ONIG_ENCODING_ISO_8859_13 + }, + { + "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0", + ONIG_ENCODING_ISO_8859_14 + }, + { + "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0", + ONIG_ENCODING_ISO_8859_15 + }, + { + "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0", + ONIG_ENCODING_ISO_8859_16 + }, + { + "ASCII\0US-ASCII\0US_ASCII\0ISO646\0", + ONIG_ENCODING_ASCII + }, + { NULL, ONIG_ENCODING_UNDEF } +}; +/* }}} */ + /* {{{ php_mb_regex_name2mbctype */ -php_mb_reg_char_encoding php_mb_regex_name2mbctype(const char *pname) +OnigEncoding php_mb_regex_name2mbctype(const char *pname) { - php_mb_reg_char_encoding mbctype; - - mbctype = REGCODE_UNDEF; - if (pname != NULL) { - if (strcasecmp("EUC-JP", pname) == 0 - || strcasecmp("X-EUC-JP", pname) == 0 - || strcasecmp("UJIS", pname) == 0 - || strcasecmp("EUCJP", pname) == 0 - || strcasecmp("EUC_JP", pname) == 0 - || strcasecmp("EUCJP-WIN", pname) == 0) { - mbctype = REGCODE_EUCJP; - } else if (strcasecmp("UTF-8", pname) == 0 - || strcasecmp("UTF8", pname) == 0) { - mbctype = REGCODE_UTF8; - } else if (strcasecmp("SJIS", pname) == 0 - || strcasecmp("CP932", pname) == 0 - || strcasecmp("MS932", pname) == 0 - || strcasecmp("SHIFT_JIS", pname) == 0 - || strcasecmp("SJIS-WIN", pname) == 0) { - mbctype = REGCODE_SJIS; - } else if (strcasecmp("ASCII", pname) == 0) { - mbctype = REGCODE_ASCII; + const char *p; + php_mb_regex_enc_name_map_t *mapping; + + if (pname == NULL) { + return ONIG_ENCODING_UNDEF; + } + + for (mapping = enc_name_map; mapping->names != NULL; mapping++) { + for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) { + if (strcasecmp(p, pname) == 0) { + return mapping->code; + } } } - return mbctype; + return ONIG_ENCODING_UNDEF; } /* }}} */ /* {{{ php_mbregex_mbctype2name */ -const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype) +const char *php_mb_regex_mbctype2name(OnigEncoding mbctype) { - const char *p = NULL; - - if (mbctype == REGCODE_EUCJP) { - p = "EUC-JP"; - } else if(mbctype == REGCODE_UTF8) { - p = "UTF-8"; - } else if(mbctype == REGCODE_SJIS) { - p = "SJIS"; - } else if(mbctype == REGCODE_ASCII) { - p = "ascii"; + php_mb_regex_enc_name_map_t *mapping; + + for (mapping = enc_name_map; mapping->names != NULL; mapping++) { + if (mapping->code == mbctype) { + return mapping->names; + } } - return p; + + return NULL; } /* }}} */ @@ -162,18 +261,18 @@ const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype) * regex cache */ /* {{{ php_mbregex_compile_pattern */ -static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, php_mb_reg_option_type options, php_mb_reg_char_encoding enc, php_mb_reg_syntax_type *syntax TSRMLS_DC) +static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC) { int err_code = 0; int found = 0; php_mb_regex_t *retval = NULL, **rc = NULL; - php_mb_reg_error_info err_info; - UChar err_str[REG_MAX_ERROR_MESSAGE_LEN]; + OnigErrorInfo err_info; + UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; found = zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc); if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) { - if ((err_code = php_mb_regex_new(&retval, (UChar *)pattern, (UChar *)(pattern + patlen), options, enc, syntax, &err_info)) != REG_NORMAL) { - php_mb_regex_error_code_to_str(err_str, err_code, err_info); + if ((err_code = onig_new(&retval, (UChar *)pattern, (UChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { + onig_error_code_to_str(err_str, err_code, err_info); php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str); retval = NULL; goto out; @@ -188,14 +287,14 @@ out: /* }}} */ /* {{{ _php_mb_regex_get_option_string */ -static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_option_type option, php_mb_reg_syntax_type *syntax) +static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax) { size_t len_left = len; size_t len_req = 0; char *p = str; char c; - if ((option & RE_OPTION_IGNORECASE) != 0) { + if ((option & ONIG_OPTION_IGNORECASE) != 0) { if (len_left > 0) { --len_left; *(p++) = 'i'; @@ -203,7 +302,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_ ++len_req; } - if ((option & RE_OPTION_EXTENDED) != 0) { + if ((option & ONIG_OPTION_EXTEND) != 0) { if (len_left > 0) { --len_left; *(p++) = 'x'; @@ -211,14 +310,15 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_ ++len_req; } - if ((option & RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) { + if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) == + (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) { if (len_left > 0) { --len_left; *(p++) = 'p'; } ++len_req; } else { - if ((option & RE_OPTION_MULTILINE) != 0) { + if ((option & ONIG_OPTION_MULTILINE) != 0) { if (len_left > 0) { --len_left; *(p++) = 'm'; @@ -226,7 +326,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_ ++len_req; } - if ((option & RE_OPTION_SINGLELINE) != 0) { + if ((option & ONIG_OPTION_SINGLELINE) != 0) { if (len_left > 0) { --len_left; *(p++) = 's'; @@ -234,14 +334,14 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_ ++len_req; } } - if ((option & RE_OPTION_LONGEST) != 0) { + if ((option & ONIG_OPTION_FIND_LONGEST) != 0) { if (len_left > 0) { --len_left; *(p++) = 'l'; } ++len_req; } - if ((option & REG_OPTION_FIND_NOT_EMPTY) != 0) { + if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) { if (len_left > 0) { --len_left; *(p++) = 'n'; @@ -251,21 +351,21 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_ c = 0; - if (syntax == REG_SYNTAX_JAVA) { + if (syntax == ONIG_SYNTAX_JAVA) { c = 'j'; - } else if (syntax == REG_SYNTAX_GNU_REGEX) { + } else if (syntax == ONIG_SYNTAX_GNU_REGEX) { c = 'u'; - } else if (syntax == REG_SYNTAX_GREP) { + } else if (syntax == ONIG_SYNTAX_GREP) { c = 'g'; - } else if (syntax == REG_SYNTAX_EMACS) { + } else if (syntax == ONIG_SYNTAX_EMACS) { c = 'c'; - } else if (syntax == REG_SYNTAX_RUBY) { + } else if (syntax == ONIG_SYNTAX_RUBY) { c = 'r'; - } else if (syntax == REG_SYNTAX_PERL) { + } else if (syntax == ONIG_SYNTAX_PERL) { c = 'z'; - } else if (syntax == REG_SYNTAX_POSIX_BASIC) { + } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) { c = 'b'; - } else if (syntax == REG_SYNTAX_POSIX_EXTENDED) { + } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) { c = 'd'; } @@ -293,13 +393,13 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_ /* {{{ _php_mb_regex_init_options */ static void -_php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *option, php_mb_reg_syntax_type **syntax, int *eval) +_php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) { int n; char c; int optm = 0; - *syntax = REG_SYNTAX_RUBY; + *syntax = ONIG_SYNTAX_RUBY; if (parg != NULL) { n = 0; @@ -307,49 +407,49 @@ _php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *o c = parg[n++]; switch (c) { case 'i': - optm |= RE_OPTION_IGNORECASE; + optm |= ONIG_OPTION_IGNORECASE; break; case 'x': - optm |= RE_OPTION_EXTENDED; + optm |= ONIG_OPTION_EXTEND; break; case 'm': - optm |= RE_OPTION_MULTILINE; + optm |= ONIG_OPTION_MULTILINE; break; case 's': - optm |= RE_OPTION_SINGLELINE; + optm |= ONIG_OPTION_SINGLELINE; break; case 'p': - optm |= RE_OPTION_POSIXLINE; + optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; break; case 'l': - optm |= RE_OPTION_LONGEST; + optm |= ONIG_OPTION_FIND_LONGEST; break; case 'n': - optm |= REG_OPTION_FIND_NOT_EMPTY; + optm |= ONIG_OPTION_FIND_NOT_EMPTY; break; case 'j': - *syntax = REG_SYNTAX_JAVA; + *syntax = ONIG_SYNTAX_JAVA; break; case 'u': - *syntax = REG_SYNTAX_GNU_REGEX; + *syntax = ONIG_SYNTAX_GNU_REGEX; break; case 'g': - *syntax = REG_SYNTAX_GREP; + *syntax = ONIG_SYNTAX_GREP; break; case 'c': - *syntax = REG_SYNTAX_EMACS; + *syntax = ONIG_SYNTAX_EMACS; break; case 'r': - *syntax = REG_SYNTAX_RUBY; + *syntax = ONIG_SYNTAX_RUBY; break; case 'z': - *syntax = REG_SYNTAX_PERL; + *syntax = ONIG_SYNTAX_PERL; break; case 'b': - *syntax = REG_SYNTAX_POSIX_BASIC; + *syntax = ONIG_SYNTAX_POSIX_BASIC; break; case 'd': - *syntax = REG_SYNTAX_POSIX_EXTENDED; + *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; case 'e': if (eval != NULL) *eval = 1; @@ -372,7 +472,7 @@ _php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *o PHP_FUNCTION(mb_regex_encoding) { zval **arg1; - php_mb_reg_char_encoding mbctype; + OnigEncoding mbctype; if (ZEND_NUM_ARGS() == 0) { const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype)); @@ -406,8 +506,9 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) char *string; int string_len; php_mb_regex_t *re; - php_mb_reg_region *regs = NULL; - int i, match_len, option, beg, end; + OnigRegion *regs = NULL; + int i, match_len, beg, end; + OnigOptionType options; char *str; array = NULL; @@ -416,9 +517,9 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) RETURN_FALSE; } - option = MBSTRG(regex_default_options); + options = MBSTRG(regex_default_options); if (icase) { - option |= RE_OPTION_IGNORECASE; + options |= ONIG_OPTION_IGNORECASE; } /* compile the regular expression from the supplied regex */ @@ -433,16 +534,16 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) arg_pattern = &tmp; /* don't bother doing an extended regex with just a number */ } - re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), option, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC); + re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC); if (re == NULL) { RETVAL_FALSE; goto out; } - regs = php_mb_regex_region_new(); + regs = onig_region_new(); /* actually execute the regular expression */ - if (php_mb_regex_search(re, (UChar *)string, (UChar *)(string + string_len), string, (UChar *)(string + string_len), regs, 0) < 0) { + if (onig_search(re, (UChar *)string, (UChar *)(string + string_len), string, (UChar *)(string + string_len), regs, 0) < 0) { RETVAL_FALSE; goto out; } @@ -471,7 +572,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) RETVAL_LONG(match_len); out: if (regs != NULL) { - php_mb_regex_region_free(regs, 1); + onig_region_free(regs, 1); } if (arg_pattern == &tmp) { zval_dtor(&tmp); @@ -496,7 +597,7 @@ PHP_FUNCTION(mb_eregi) /* }}} */ /* {{{ _php_mb_regex_ereg_replace_exec */ -static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option) +static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options) { zval *arg_pattern_zval; @@ -511,8 +612,8 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op char *p; php_mb_regex_t *re; - php_mb_reg_syntax_type *syntax; - php_mb_reg_region *regs = NULL; + OnigSyntaxType *syntax; + OnigRegion *regs = NULL; smart_str out_buf = { 0 }; smart_str eval_buf = { 0 }; smart_str *pbuf; @@ -547,9 +648,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op } if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, &eval); + _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); } else { - option |= MBSTRG(regex_default_options); + options |= MBSTRG(regex_default_options); syntax = MBSTRG(regex_default_syntax); } } @@ -566,7 +667,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op arg_pattern_len = 1; } /* create regex pattern buffer */ - re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBSTRG(current_mbctype), syntax TSRMLS_CC); + re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBSTRG(current_mbctype), syntax TSRMLS_CC); if (re == NULL) { RETURN_FALSE; } @@ -583,12 +684,12 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op err = 0; pos = string; string_lim = (UChar*)(string + string_len); - regs = php_mb_regex_region_new(); + regs = onig_region_new(); while (err >= 0) { - err = php_mb_regex_search(re, (UChar *)string, (UChar *)string_lim, pos, (UChar *)string_lim, regs, 0); + err = onig_search(re, (UChar *)string, (UChar *)string_lim, pos, (UChar *)string_lim, regs, 0); if (err <= -2) { - UChar err_str[REG_MAX_ERROR_MESSAGE_LEN]; - php_mb_regex_error_code_to_str(err_str, err); + UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(err_str, err); php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str); break; } @@ -651,14 +752,14 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op smart_str_appendl(&out_buf, pos, string_lim - pos); } } - php_mb_regex_region_free(regs, 0); + onig_region_free(regs, 0); } if (description) { efree(description); } if (regs != NULL) { - php_mb_regex_region_free(regs, 1); + onig_region_free(regs, 1); } smart_str_free(&eval_buf); @@ -684,7 +785,7 @@ PHP_FUNCTION(mb_ereg_replace) Case insensitive replace regular expression for multibyte string */ PHP_FUNCTION(mb_eregi_replace) { - _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, RE_OPTION_IGNORECASE); + _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE); } /* }}} */ @@ -695,7 +796,7 @@ PHP_FUNCTION(mb_split) char *arg_pattern; int arg_pattern_len; php_mb_regex_t *re; - php_mb_reg_region *regs = NULL; + OnigRegion *regs = NULL; char *string; UChar *pos; int string_len; @@ -720,10 +821,10 @@ PHP_FUNCTION(mb_split) pos = (UChar *)string; err = 0; - regs = php_mb_regex_region_new(); + regs = onig_region_new(); /* churn through str, generating array entries as we go */ while ((--count != 0) && - (err = php_mb_regex_search(re, (UChar *)string, (UChar *)(string + string_len), pos, (UChar *)(string + string_len), regs, 0)) >= 0) { + (err = onig_search(re, (UChar *)string, (UChar *)(string + string_len), pos, (UChar *)(string + string_len), regs, 0)) >= 0) { if (regs->beg[0] == regs->end[0]) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); break; @@ -744,15 +845,15 @@ PHP_FUNCTION(mb_split) if (count < 0) { count = 0; } - php_mb_regex_region_free(regs, 0); + onig_region_free(regs, 0); } - php_mb_regex_region_free(regs, 1); + onig_region_free(regs, 1); /* see if we encountered an error */ if (err <= -2) { - UChar err_str[REG_MAX_ERROR_MESSAGE_LEN]; - php_mb_regex_error_code_to_str(err_str, err); + UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(err_str, err); php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str); zval_dtor(return_value); RETURN_FALSE; @@ -779,7 +880,7 @@ PHP_FUNCTION(mb_ereg_match) int string_len; php_mb_regex_t *re; - php_mb_reg_syntax_type *syntax; + OnigSyntaxType *syntax; int option = 0, err; { @@ -805,7 +906,7 @@ PHP_FUNCTION(mb_ereg_match) } /* match */ - err = php_mb_regex_match(re, (UChar *)string, (UChar *)(string + string_len), (UChar *)string, NULL, 0); + err = onig_match(re, (UChar *)string, (UChar *)(string + string_len), (UChar *)string, NULL, 0); if (err >= 0) { RETVAL_TRUE; } else { @@ -822,7 +923,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) zval **arg_pattern, **arg_options; int n, i, err, pos, len, beg, end, option; UChar *str; - php_mb_reg_syntax_type *syntax; + OnigSyntaxType *syntax; option = MBSTRG(regex_default_options); switch (ZEND_NUM_ARGS()) { @@ -873,17 +974,17 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) } if (MBSTRG(search_regs)) { - php_mb_regex_region_free(MBSTRG(search_regs), 1); + onig_region_free(MBSTRG(search_regs), 1); } - MBSTRG(search_regs) = php_mb_regex_region_new(); + MBSTRG(search_regs) = onig_region_new(); - err = php_mb_regex_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0); - if (err == REG_MISMATCH) { + err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0); + if (err == ONIG_MISMATCH) { MBSTRG(search_pos) = len; RETVAL_FALSE; } else if (err <= -2) { - UChar err_str[REG_MAX_ERROR_MESSAGE_LEN]; - php_mb_regex_error_code_to_str(err_str, err); + UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(err_str, err); php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str); RETVAL_FALSE; } else { @@ -924,8 +1025,8 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) } if (err < 0) { - php_mb_regex_region_free(MBSTRG(search_regs), 1); - MBSTRG(search_regs) = (php_mb_reg_region *)NULL; + onig_region_free(MBSTRG(search_regs), 1); + MBSTRG(search_regs) = (OnigRegion *)NULL; } } /* }}} */ @@ -959,7 +1060,7 @@ PHP_FUNCTION(mb_ereg_search_regs) PHP_FUNCTION(mb_ereg_search_init) { zval **arg_str, **arg_pattern, **arg_options; - php_mb_reg_syntax_type *syntax = NULL; + OnigSyntaxType *syntax = NULL; int option; option = MBSTRG(regex_default_options); @@ -1008,8 +1109,8 @@ PHP_FUNCTION(mb_ereg_search_init) MBSTRG(search_pos) = 0; if (MBSTRG(search_regs) != NULL) { - php_mb_regex_region_free(MBSTRG(search_regs), 1); - MBSTRG(search_regs) = (php_mb_reg_region *) NULL; + onig_region_free(MBSTRG(search_regs), 1); + MBSTRG(search_regs) = (OnigRegion *) NULL; } RETURN_TRUE; @@ -1076,7 +1177,7 @@ PHP_FUNCTION(mb_ereg_search_setpos) /* }}} */ /* {{{ php_mb_regex_set_options */ -void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_type *syntax, php_mb_reg_option_type *prev_options, php_mb_reg_syntax_type **prev_syntax TSRMLS_DC) +void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC) { if (prev_options != NULL) { *prev_options = MBSTRG(regex_default_options); @@ -1093,8 +1194,8 @@ void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_ Set or get the default options for mbregex functions */ PHP_FUNCTION(mb_regex_set_options) { - php_mb_reg_option_type opt; - php_mb_reg_syntax_type *syntax; + OnigOptionType opt; + OnigSyntaxType *syntax; char *string = NULL; int string_len; char buf[16]; diff --git a/ext/mbstring/php_mbregex.h b/ext/mbstring/php_mbregex.h index 86c9bf970a..7d99953a70 100644 --- a/ext/mbstring/php_mbregex.h +++ b/ext/mbstring/php_mbregex.h @@ -29,16 +29,16 @@ /* {{{ PHP_MBREGEX_GLOBALS */ #define PHP_MBREGEX_GLOBALS \ - php_mb_reg_char_encoding default_mbctype; \ - php_mb_reg_char_encoding current_mbctype; \ + OnigEncoding default_mbctype; \ + OnigEncoding current_mbctype; \ HashTable ht_rc; \ zval *search_str; \ zval *search_str_val; \ unsigned int search_pos; \ php_mb_regex_t *search_re; \ - struct php_mb_re_registers *search_regs; \ - int regex_default_options; \ - php_mb_reg_syntax_type *regex_default_syntax; + OnigRegion *search_regs; \ + OnigOptionType regex_default_options; \ + OnigSyntaxType *regex_default_syntax; /* }}} */ /* {{{ PHP_MBREGEX_FUNCTION_ENTRIES */ @@ -83,10 +83,10 @@ PHP_MSHUTDOWN_FUNCTION(mb_regex); PHP_RINIT_FUNCTION(mb_regex); PHP_RSHUTDOWN_FUNCTION(mb_regex); void _php_mb_regex_globals_ctor(zend_mbstring_globals_ptr pglobals TSRMLS_DC); -void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_type *syntax, php_mb_reg_option_type *prev_options, php_mb_reg_syntax_type **prev_syntax TSRMLS_DC); +void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC); void _php_mb_regex_globals_dtor(zend_mbstring_globals_ptr pglobals TSRMLS_DC); -php_mb_reg_char_encoding php_mb_regex_name2mbctype(const char *pname); -const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype); +OnigEncoding php_mb_regex_name2mbctype(const char *pname); +const char *php_mb_regex_mbctype2name(OnigEncoding mbctype); PHP_FUNCTION(mb_regex_encoding); PHP_FUNCTION(mb_ereg);