From fe92d64a4ad700082b1e805f381183884fb7dbe1 Mon Sep 17 00:00:00 2001 From: Rui Hirokawa Date: Sat, 15 Oct 2011 08:55:53 +0000 Subject: [PATCH] updated bundled oniguruma regex library to 5.9.2. fixed bug #42290. --- ext/mbstring/oniguruma/COPYING | 6 +- ext/mbstring/oniguruma/HISTORY | 318 +- ext/mbstring/oniguruma/README | 42 +- ext/mbstring/oniguruma/README.ja | 41 +- ext/mbstring/oniguruma/doc/API | 67 +- ext/mbstring/oniguruma/doc/API.ja | 72 +- ext/mbstring/oniguruma/doc/FAQ | 4 +- ext/mbstring/oniguruma/doc/FAQ.ja | 18 +- ext/mbstring/oniguruma/doc/RE | 153 +- ext/mbstring/oniguruma/doc/RE.ja | 162 +- ext/mbstring/oniguruma/enc/ascii.c | 19 +- ext/mbstring/oniguruma/enc/big5.c | 36 +- ext/mbstring/oniguruma/enc/cp1251.c | 200 + ext/mbstring/oniguruma/enc/euc_jp.c | 199 +- ext/mbstring/oniguruma/enc/euc_kr.c | 53 +- ext/mbstring/oniguruma/enc/euc_tw.c | 39 +- ext/mbstring/oniguruma/enc/gb18030.c | 36 +- ext/mbstring/oniguruma/enc/iso8859_1.c | 269 +- ext/mbstring/oniguruma/enc/iso8859_10.c | 305 +- ext/mbstring/oniguruma/enc/iso8859_11.c | 81 +- ext/mbstring/oniguruma/enc/iso8859_13.c | 254 +- ext/mbstring/oniguruma/enc/iso8859_14.c | 297 +- ext/mbstring/oniguruma/enc/iso8859_15.c | 272 +- ext/mbstring/oniguruma/enc/iso8859_16.c | 287 +- ext/mbstring/oniguruma/enc/iso8859_2.c | 291 +- ext/mbstring/oniguruma/enc/iso8859_3.c | 282 +- ext/mbstring/oniguruma/enc/iso8859_4.c | 291 +- ext/mbstring/oniguruma/enc/iso8859_5.c | 292 +- ext/mbstring/oniguruma/enc/iso8859_6.c | 71 +- ext/mbstring/oniguruma/enc/iso8859_7.c | 264 +- ext/mbstring/oniguruma/enc/iso8859_8.c | 67 +- ext/mbstring/oniguruma/enc/iso8859_9.c | 262 +- ext/mbstring/oniguruma/enc/koi8.c | 260 +- ext/mbstring/oniguruma/enc/koi8_r.c | 240 +- ext/mbstring/oniguruma/enc/mktable.c | 127 +- ext/mbstring/oniguruma/enc/sjis.c | 188 +- ext/mbstring/oniguruma/enc/unicode.c | 10107 ++++++++++++++++++--- ext/mbstring/oniguruma/enc/utf16_be.c | 125 +- ext/mbstring/oniguruma/enc/utf16_le.c | 122 +- ext/mbstring/oniguruma/enc/utf32_be.c | 125 +- ext/mbstring/oniguruma/enc/utf32_le.c | 123 +- ext/mbstring/oniguruma/enc/utf8.c | 3563 +------- ext/mbstring/oniguruma/index.html | 18 +- ext/mbstring/oniguruma/index_ja.html | 190 + ext/mbstring/oniguruma/onigposix.h | 2 +- ext/mbstring/oniguruma/oniguruma.h | 259 +- ext/mbstring/oniguruma/regcomp.c | 2070 +++-- ext/mbstring/oniguruma/regenc.c | 690 +- ext/mbstring/oniguruma/regenc.h | 120 +- ext/mbstring/oniguruma/regerror.c | 50 +- ext/mbstring/oniguruma/regexec.c | 998 +- ext/mbstring/oniguruma/regext.c | 27 +- ext/mbstring/oniguruma/reggnu.c | 28 +- ext/mbstring/oniguruma/regint.h | 418 +- ext/mbstring/oniguruma/regparse.c | 1823 ++-- ext/mbstring/oniguruma/regparse.h | 307 +- ext/mbstring/oniguruma/regposerr.c | 14 +- ext/mbstring/oniguruma/regposix.c | 12 +- ext/mbstring/oniguruma/regsyntax.c | 87 +- ext/mbstring/oniguruma/regversion.c | 5 +- ext/mbstring/oniguruma/st.c | 21 +- ext/mbstring/oniguruma/testc.c | 863 ++ ext/mbstring/oniguruma/testu.c | 911 ++ ext/mbstring/oniguruma/win32/Makefile | 200 + ext/mbstring/oniguruma/win32/testc.c | 863 ++ ext/mbstring/tests/mb_eregi_replace.phpt | 6 +- 66 files changed, 18956 insertions(+), 11056 deletions(-) create mode 100644 ext/mbstring/oniguruma/enc/cp1251.c create mode 100644 ext/mbstring/oniguruma/index_ja.html create mode 100644 ext/mbstring/oniguruma/testc.c create mode 100644 ext/mbstring/oniguruma/testu.c create mode 100644 ext/mbstring/oniguruma/win32/Makefile create mode 100644 ext/mbstring/oniguruma/win32/testc.c diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING index 4d321bb93b..2cee0bbec8 100644 --- a/ext/mbstring/oniguruma/COPYING +++ b/ext/mbstring/oniguruma/COPYING @@ -1,12 +1,8 @@ Oniguruma LICENSE ----------------- -When this software is partly used or it is distributed with Ruby, -this of Ruby follows the license of Ruby. -It follows the BSD license in the case of the one except for it. - /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index a1debefa49..06f38c246a 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,111 +1,316 @@ History -2007/08/16: Version 4.7.1 - -2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. -2007/07/04: [spec] (thanks K.Takata) +2010/01/09: Version 5.9.2 + +2010/01/05: [bug] fix utf16be_code_to_mbc() and utf16le_code_to_mbc(). +2008/09/16: [bug] fix memory leaks in parse_exp(). +2008/08/01: [bug] fix memory leaks. +2008/06/17: [bug] invalid type of argument was used + in onig_st_lookup_strend(). +2008/06/16: [bug] invalid CaseFoldMap entry in ISO-8859-5. 0xdf -> 0xde +2008/02/19: [new] add: onig_reg_init(). +2008/02/19: [new] add: onig_free_body(). +2008/02/19: [new] add: onig_new_without_alloc(). +2008/02/19: [API] rename onig_alloc_init() to onig_reg_init(), + and argument type changed. +2008/01/31: [impl] move UTF16_IS_SURROGATE_XXX() to regenc.h. +2008/01/30: [bug] (thanks akr) + fix euctw_islead(). +2008/01/23: [bug] update enc/koi8.c. + +2007/12/22: Version 5.9.1 + +2007/12/21: [impl] add sprint_byte(). +2007/11/28: [bug] (thanks Andy Armstrong) + don't overwrite error code in fetch_name(). +2007/11/12: [bug] utf8 mbc length of code 0xfe, 0xff are not 1, +2007/10/23: [spec] onig_enc_len() takes three arguments. (not used) +2007/10/15: [impl] (thanks Rui Hirokawa) + add check HAVE_STDARG_H. +2007/09/07: [API] rename enc_len() to onig_enc_len() in oniguruma.h. +2007/09/04: [API] remove ONIGENC_ERR_XXXXX. +2007/09/03: [API] add error ONIGERR_INVALID_CODE_POINT_VALUE. +2007/09/03: [impl] change error message to "invaid code point value" + for ONIGERR_INVALID_WIDE_CHAR_VALUE. +2007/09/03: [bug] xxx_code_to_mbclen() should return + ONIGERR_INVALID_WIDE_CHAR_VALUE for invalid code point. + ex. /[\x{7fffffff}]/ for ASCII encoding. +2007/08/28: [impl] remove "warning: no previous declaration ...". +2007/08/21: [impl] remove warnings in enc/mktable.c. +2007/08/20: [impl] remove "warning: unused parameter" +2007/08/20: [impl] remove "warning: comparison between signed and unsigned". +2007/08/06: [impl] remove clear_not_flag_cclass(). +2007/08/03: [bug] fix the case of undefined USE_NAMED_GROUP. +2007/08/02: [spec] add backref by number. +2007/08/01: [API] add OnigCtype. +2007/07/27: [spec] add USE_CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS. +2007/07/24: [impl] define PLATFORM_UNALIGNED_WORD_ACCESS. +2007/07/23: [dist] fix doc/FAQ.ja. + +2007/07/14: Version 5.9.0 + +2007/07/13: [bug] add check into onig_reduce_nested_quantifier(). +2007/06/26: [spec] (thanks K.Takata) ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl) -2007/07/04: [dist] (thanks K.Takata) +2007/06/26: [dist] (thanks K.Takata) fix documents API and API.ja. - -2007/06/18: Version 4.7.0 - -2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/06/19: [impl] remove IS_NOT_NULL() check before onig_node_free(). 2007/06/18: [bug] (thanks KUBO Takehiro) WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint). -2007/06/05: [impl] add #ifndef vsnprintf in regint.h. -2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case +2007/06/18: [impl] rename CClassNode flags. +2007/06/18: [bug] initialization miss. +2007/06/13: [impl] change node type reference NXXXX. +2007/06/11: [impl] add node type bit. +2007/06/11: [spec] allow anchor in enclosed repeater. /(\z)*/ +2007/06/11: [impl] rename node types. +2007/06/08: [impl] remove OP_SET_OPTION_PUSH and OP_SET_OPTION from match_at(). +2007/06/07: [impl] use xvsnprintf(). +2007/06/06: [tune] don't set qn->next_head_exact for string first byte is zero. +2007/06/06: [impl] remove unused variables. + +2007/06/04: Version 5.8.0 + +2007/06/04: [impl] add #ifndef vsnprintf into regint.h. +2007/05/31: [dist] add configure option '--enable-crnl-as-line-terminator'. +2007/05/30: [dist] add sample/crnl.c. +2007/05/30: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case in onig_search(). +2007/05/29: [impl] move USE_CRNL_AS_LINE_TERMINATOR into regenc.h. +2007/05/29: [impl] should check USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + in forward_search_range() and backward_search_range(). -2007/04/12: Version 4.6.2 +2007/04/27: Version 5.7.0 +2007/04/20: [spec] add config USE_MATCH_RANGE_IS_COMPLETE_RANGE. +2007/04/20: [impl] refactoring in match_at(). + +2007/04/12: Version 5.6.1 + +2007/04/12: [bug] must not use UChar in oniguruma.h. 2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000 - to 0x4000. + to 0x4000. [ruby-core:10883] + +2007/04/04: Version 5.6.0 (mourning for Hideo Takamatsu) + +2007/04/03: [spec] add new notation (?'name'), \k'name', \g'name'. +2007/04/03: [impl] remove unused variable. 2007/03/26: [impl] add 'void' to function declarations. -2007/03/06: Version 4.6.1 +2007/03/06: Version 5.5.3 -2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. 2007/03/06: [bug] add #include for bcc32. (In bcc32, alloca() is declared in malloc.h.) -2007/03/06: [impl] remove including version.h of Ruby. 2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search(). ex. /\n\Z/.match("aaaaaaaaaa\n") 2007/03/02: [impl] move range > start check position in end_buf process. -2007/02/08: Version 4.6.0 +2007/01/09: Version 5.5.2 -2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. -2007/01/09: [tune] select_opt_exact_info() didn't work for empty info. +2007/01/09: [impl] rename USE_EXTERNAL_LOWER_CASE_CONV_TABLE. +2007/01/05: [tune] select_opt_exact_info() didn't work for empty info. ex. /.a/ make MAP info instead of EXACT info. -2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode. -2006/12/22: [spec] should check too short multibyte char in parse_exp(). - add USE_PAD_TO_SHORT_BYTE_CHAR. - ex. /\x00/ in UTF16 should be error. +2006/12/28: [impl] add print_enc_string() for ONIG_DEBUG mode. + +2006/12/22: Version 5.5.1 -2006/11/17: Version 4.5.1 +2006/12/22: [impl] rename ADD_PAD_TO_SHORT_BYTE_STRING + . to USE_PAD_TO_SHORT_BYTE_CHAR. +2006/12/21: [spec] should check too short multibyte char in parse_exp(). + add ADD_PAD_TO_SHORT_BYTE_STRING. + ex. /\x00/ in UTF16 should be error. -2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. -2006/11/15: [impl] remove CHECK_INTERRUPT. +2006/12/06: Version 5.5.0 + +2006/12/05: [bug] should add unfold-1 codes from folded code into + onigenc_unicode_get_case_fold_codes_by_str(). + (ex. "S" -> "s" -> 0x017f) +2006/12/05: [new] add flag ONIGENC_CASE_FOLD_TURKISH_AZERI and + USE_UNICODE_CASE_FOLD_TURKISH_AZERI. (disabled in default) +2006/12/04: [spec] remove ONIGENC_CASE_FOLD_FULL. +2006/11/30: [impl] remove unnecessary check in xxx_mbc_case_fold(). + +2006/11/29: Version 5.4.0 + +2006/11/28: [spec] INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR is enabled in + default case fold status. +2006/11/28: [spec] rename ONIGENC_CASE_FOLD_MULTI_CHAR to + INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR. +2006/11/28: [impl] remove USE_UNICODE_CASE_FOLD_MULTI_CHAR. +2006/11/28: [impl] remove Fold[123]Table and add FoldTable. +2006/11/27: [impl] change tool/unicode_fc.rb to see CaseFolding.txt. +2006/11/24: [bug] should call callback for to[j] <-> to[k] in + onigenc_unicode_apply_all_case_fold(). + +2006/11/22: Version 5.3.0 + +2006/11/22: [dist] add index_ja.html. +2006/11/22: [impl] undef ONIG_ESCAPE_UCHAR_COLLISION in regint.h and regenc.h. +2006/11/21: [bug] invalid array access. +2006/11/21: [impl] escape UChar collision from config.h. +2006/11/20: [new] add Hiragana/Katakana properties into Shift_JIS. +2006/11/20: [impl] fix CR_Katakana[] values in EUC-JP. +2006/11/17: [impl] declare strend hash table functions in regint.h. +2006/11/17: [impl] move property list functions to regenc.c. +2006/11/17: [new] add Hiragana/Katakana properties into EUC-JP. +2006/11/15: [impl] remove NOT_RUBY from AM_CFLAGS. + +2006/11/14: Version 5.2.0 + +2006/11/14: [impl] remove program codes for Ruby. +2006/11/14: [impl] reduce program codes for Ruby. 2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e should be [:punct:]. +2006/11/09: [new] (thanks Byte) + add new character encoding CP1251. 2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER. -2006/11/07: [bug] (thanks Byte) - add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R. -2006/11/06: Version 4.5.0 +2006/11/07: Version 5.1.0 -2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. -2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND. +2006/11/07: [dist] remove test.rb, testconv.rb and testconvu.rb. +2006/11/07: [bug] get_case_fold_codes_by_str() should handle 'Ss' and 'sS' + combination for ess-tsett. +2006/11/07: [impl] apply_all_case_fold() doesn't need to return all + case character combination for multi-character folding. + (ONIGENC_CASE_FOLD_MULTI_CHAR) +2006/11/07: [bug] (thanks Byte) + add { 0xa3, 0xb3 } to CaseFoldMap[] for KOI8-R. 2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of the string range. add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE. - -2006/10/30: Version 4.4.6 - -2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/02: [impl] re-implement expand_case_fold_string() for + ONIGENC_CASE_FOLD_MULTI_CHAR. +2006/10/30: [impl] add NSTR_DONT_GET_OPTINFO flag. 2006/10/30: [impl] (thanks K.Takata) add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END. 2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner) invalid offset value was used in STATE_CHECK_BUFF_INIT(). - -2006/10/24: Version 4.4.5 - -2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. -2006/10/24: [impl] escape -Wall warning. -2006/10/24: [tune] (thanks Kornelius Kalnbach) +2006/10/27: [tune] speed up ONIGENC_MBC_CASE_FOLD() for UTF-16, UTF-32. + (ASCII code check) +2006/10/27: [tune] (thanks Kornelius Kalnbach) String#scan for long string needs long time compare with old Ruby by initialization time for combination explosion check ex. ("test " * 100_000).scan(/\w*\s?/) change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000. reduce initialization area of state_check_buff. +2006/10/25: [impl] add DISABLE_CASE_FOLD_MULTI_CHAR(). + +2006/10/23: Version 5.0.1 + +2006/10/23: [bug] should fold string in expand_case_fold_string(). +2006/10/23: [bug] (thanks Km) + too many case fold/unfold expansion problem. + don't expand and set ambig flag to the string node. + (except ONIGENC_CASE_FOLD_MULTI_CHAR). +2006/10/23: [bug] (thanks K.Takata) + invalid \p{Alnum}, \p{ASCII}, [:alnum:], [:ascii:]. + fix OnigEncAsciiCtypeTable[] etc... +2006/10/23: [spec] (thanks K.Takata) + add [:word:] POSIX bracket. +2006/10/23: [bug] (thanks K.Takata) + \p{Word} doesn't work. +2006/10/20: [impl] don't expand for AMBIG_FLAG string in + expand_case_fold_string(). + +2006/10/19: Version 5.0.0 + +2006/10/18: [bug] ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM should be 13. +2006/10/18: [impl] remove unused functions. +2006/10/18: [dist] update documents. +2006/10/18: [API] move OnigMetaCharTableType to OnigSyntaxType. +2006/10/18: [dev] add too/unicode_fc.rb, unicode_pc.rb. +2006/10/18: [dist] remove MANIFEST-RUBY from distribution. +2006/10/18: [bug] return duplicated code in + onigenc_unicode_get_case_fold_codes_by_str(). +2006/10/18 [API] remove ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS. +2006/10/18: [dev] add tool/19. +2006/10/18: [dist] remove target 19 from Makefile.am. +2006/10/17: [dist] add enc/unicode.c to target 19 of win32/Makefile. +2006/10/17: [impl] change type for escape VC++ warning. +2006/10/17: [API] rename ONIGENC_CASE_FOLD_NONE to ONIGENC_CASE_FOLD_MIN. +2006/10/17: [dist] remove INSTALL-RUBY from distribution. +2006/10/17: [dist] update LTVERSION to "2:0:0". +2006/10/17: [impl] remove warnings for [make CFLAGS="-g -O2 -Wall"] + in the case USE_UNICODE_PROPERTIES and + USE_UNICODE_CASE_FOLD_MULTI_CHAR are undefined. +2006/10/17: [impl] remove warnings for [make CFLAGS="-g -O2 -Wall"]. +2006/10/17: [impl] re-implement onigenc_unicode_apply_all_case_fold(). + multi-char by case folded char-class is treated as + caseless-string (ambig flag on). + enable OP_EXACT1_IC and OP_EXACTN_IC. +2006/10/16: [bug] unfold expand for 1->2, 1->3 folding in + onigenc_unicode_apply_all_case_fold(). + add CaseFoldExpand_12[], CaseFoldExpand_13[]. 2006/10/16: [bug] (thanks Akinori Musha) first argument of rb_warn() should be format string. -2006/10/10: [impl] add msa.state_check_buff_size initialization +2006/10/16: [impl] add msa.state_check_buff_size initialization in onig_search(). +2006/10/16: [spec] re-implement Unicode Caseless Match codes. 2006/10/10: [bug] should call onig_st_free_table() in onig_free_shared_cclass_table(). +2006/10/10: [impl] remove OnigCompCaseFoldCodes. +2006/10/10: [impl] remove onigenc_ascii_is_mbc_ambiguous() and + onigenc_mbn_is_mbc_ambiguous(). +2006/10/10: [API] remove is_mbc_ambiguous() member from OnigEncodingType. +2006/10/10: [API] rename onig_set_default_ambig_flag() to + onig_set_default_case_fold_flag(), + onig_get_default_ambig_flag() to + onig_get_default_case_fold_flag(), + onig_get_ambig_flag() to onig_get_case_fold_flag(). +2006/10/10: [API] rename ambig_flag to case_fold_flag. +2006/10/10: [API] rename OnigAmbigType to OnigCaseFoldType. +2006/10/10: [impl] rename ONIGENC_IS_CODE_SB_WORD() to IS_CODE_SB_WORD() + and move to regint.h. 2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB. +2006/10/10: [impl] remove OP_EXACT1_IC and OP_EXACTN_IC from match_at(). +2006/10/10: [impl] should free new_str in expand_case_fold_string(). +2006/10/06: [dist] add test entrys to sample/encode.c. +2006/10/06: [impl] re-implement caseless match (case-fold). +2006/10/06: [impl] expand string node by case fold variations. + add expand_case_fold_string(). +2006/10/05: [spec] rename OnigCompAmbigCodeItem to OnigCaseFoldCodeItem. +2006/10/05: [spec] add apply_all_case_fold() and get_case_fold_codes_by_str() + to OnigEncodingType. +2006/10/05: [spec] remove ambig_flag, get_all_pair_ambig_codes() and + get_all_comp_ambig_codes() member from OnigEncodingType. +2006/10/03: [impl] rename mbc_to_normalize() to mbc_case_fold(). +2006/10/03: [spec] rename ONIGENC_AMBIGUOUS_MATCH_XXX + to ONIGENC_CASE_FOLD_XXX. + rename ONIGENC_CASE_FOLD_COMPOUND + to ONIGENC_CASE_FOLD_MULTI_CHAR. +2006/10/02: [impl] remove all ONIG_RUBY_M17N part. 2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT(). make valgrind happy. -2006/09/22: [impl] convert to ascii for parameter string in +2006/09/22: [impl] remove parse time ctype values (CTYPE_WORD etc...) +2006/09/22: [ruby] enable USE_BACKREF_AT_LEVEL for Ruby mode. +2006/09/22: [spec] (thanks Allan Odgaard) + allow upper case letter as the first character + of group name. + fetch_name() and fetch_name_with_level() +2006/09/21: [impl] convert to ascii for parameter string in onig_error_code_to_str(). add enc member into OnigErrorInfo. - -2006/09/19: Version 4.4.4 - -2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/21: [dist] update documents for Unicode Property. +2006/09/21: [new] add Unicode Properties. (enc/unicode.c) + Any, Assigned, C, Cc, L, Lm, Arabic, Greek etc... +2006/09/21: [impl] add USE_UNICODE_PROPERTIES into regenc.h. +2006/09/21: [impl] remove USE_UNICODE_FULL_RANGE_CTYPE. +2006/09/20: [impl] change ONIGENC_CTYPE_XXXX to sequencial values. + add BIT_CTYPE_XXXX bit flags to regenc.h. + update XXXX_CtypeTable[] for BIT_CTYPE_ALNUM. +2006/09/19: [memo] move from CVS to Subversion (1.3.2). 2006/09/19: [impl] (thanks KOYAMA Tetsuji) HAVE_STDARG_PROTOTYPES was not defined in Mac OS X by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc... - -2006/09/15: Version 4.4.3 - -2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. 2006/09/15: [bug] (thanks Allan Odgaard) out of range access in bm_search_notrev(). (p < s) +2006/09/13: [impl] add ONIGENC_CTYPE_ENC_EXT flag. +2006/09/13: [spec] remove 'Is' prefix check for property name + from fetch_char_property_to_ctype(). +2006/09/13: [API] add property_name_to_ctype member to OnigEncodingType. +2006/09/12: [spec][ruby] add ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY and + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT to OnigSyntaxRuby. 2006/09/08: Version 4.4.2 @@ -1808,8 +2013,17 @@ History [inst: changes for installation] [dist: distribution change] [test: test] +[dev: development] [memo: memo] -- + +svn copy file:///home/kosako/svnreps/svnrep_onig/trunk file:///home/kosako/svnreps/svnrep_onig/tags/5.0.0 -m "ADD TAG: 5.0.0" + + +svn propset svn:ignore -F .cvsignore . +svn commit -m "..." + + cvs history -T @@ -1820,7 +2034,7 @@ cvs rtag "VERSION_X_X_X" oniguruma * write Makefile.am and configure.in. > aclocal -> libtoolize +> libtoolize or glibtoolize > automake --foreign --add-missing > autoconf > configure --with-rubydir=... CFLAGS="-O2 -Wall" diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README index dff7fba562..8390afd050 100644 --- a/ext/mbstring/oniguruma/README +++ b/ext/mbstring/oniguruma/README @@ -1,9 +1,8 @@ -README 2007/06/18 +README 2007/05/31 Oniguruma ---- (C) K.Kosako http://www.geocities.jp/kosako3/oniguruma/ -http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ Oniguruma is a regular expressions library. The characteristics of this library is that different character encoding @@ -13,16 +12,20 @@ Supported character encodings: ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, + Shift_JIS, Big5, GB18030, KOI8-R, CP1251, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 -* GB 18030: contributed by KUBO Takehiro -* KOI8 is not included in library archive by default setup. - (need to edit Makefile if you want to use it.) +* GB18030: contributed by KUBO Takehiro +* CP1251: contributed by Byte ------------------------------------------------------------ +License + + BSD license. + + Install Case 1: Unix and Cygwin platform @@ -63,14 +66,6 @@ Install -License - - When this software is partly used or it is distributed with Ruby, - this of Ruby follows the license of Ruby. - It follows the BSD license in the case of the one except for it. - - - Regular Expressions See doc/RE (or doc/RE.ja for Japanese). @@ -108,7 +103,10 @@ Sample Programs sample/posix.c POSIX API sample. sample/sql.c example of the variable meta characters. (SQL-like pattern matching) + +Test Programs sample/syntax.c Perl, Java and ASIS syntax test. + sample/crnl.c --enable-crnl-as-line-terminator test Source Files @@ -145,9 +143,10 @@ Source Files enc/euc_kr.c EUC-KR, EUC-CN encoding. enc/sjis.c Shift_JIS encoding. enc/big5.c Big5 encoding. - enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro) + enc/gb18030.c GB18030 encoding. enc/koi8.c KOI8 encoding. enc/koi8_r.c KOI8-R encoding. + enc/cp1251.c CP1251 encoding. enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) @@ -176,14 +175,15 @@ Source Files -API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6 - - + re_compile_fastmap() is removed. - + re_alloc_pattern() is added. - +ToDo + ? case fold flag: Katakana <-> Hiragana. + ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) + ?? \X (== \PM\pM*) + ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. + ?? transmission stopper. (return ONIG_STOP from match_at()) -I'm thankful to Akinori MUSHA. +and I'm thankful to Akinori MUSHA. Mail Address: K.Kosako diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja index 2dee793cae..b14822c9e6 100644 --- a/ext/mbstring/oniguruma/README.ja +++ b/ext/mbstring/oniguruma/README.ja @@ -1,9 +1,8 @@ -README.ja 2007/06/18 +README.ja 2007/05/31 µ´¼Ö ---- (C) K.Kosako http://www.geocities.jp/kosako3/oniguruma/ -http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£ ¤³¤Î¥é¥¤¥Ö¥é¥ê¤ÎÆÃĹ¤Ï¡¢¤½¤ì¤¾¤ì¤ÎÀµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤´¤È¤Ë @@ -13,16 +12,20 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, + Shift_JIS, Big5, GB18030, KOI8-R, CP1251, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 -* GB 18030: µ×ÊÝ·òÍλáÄó¶¡ -* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£ - (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È) +* GB18030: µ×ÊÝ·òÍλáÄó¶¡ +* CP1251: Byte»áÄó¶¡ ------------------------------------------------------------ +¥é¥¤¥»¥ó¥¹ + + BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£ + + ¥¤¥ó¥¹¥È¡¼¥ë ¥±¡¼¥¹£±: Unix¤ÈCygwin´Ä¶­ @@ -63,12 +66,6 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ 5. nmake ctest -¥é¥¤¥»¥ó¥¹ - - ¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤¬Ruby¤È°ì½ï¤Ë»ÈÍѤޤ¿¤ÏÇÛÉÛ¤µ¤ì¤ë¾ì¹ç¤Ë¤Ï¡¢ - Ruby¤Î¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£ - ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¡¢BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£ - Àµµ¬É½¸½ @@ -97,7 +94,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ ¤¤¤ì¤Ð¡¢»ÈÍѤǤ­¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£ ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î - ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£ + ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤¤¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£ Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢ @@ -112,7 +109,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã sample/posix.c POSIX API»ÈÍÑÎã sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó) + +¥Æ¥¹¥È¥×¥í¥°¥é¥à sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È + sample/crnl.c --enable-crnl-as-line-terminator ¥Æ¥¹¥È ¥½¡¼¥¹¥Õ¥¡¥¤¥ë @@ -149,9 +149,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° - enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡) + enc/gb18030.c GB18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/cp1251.c CP1251 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/iso8859_1.c ISO-8859-1 (Latin-1) enc/iso8859_2.c ISO-8859-2 (Latin-2) enc/iso8859_3.c ISO-8859-3 (Latin-3) @@ -180,13 +181,15 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ -Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤ - - + re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£ - + re_alloc_pattern() ¤¬Äɲ䵤줿¡£ +»Ä·ï + ? case fold flag: Katakana <-> Hiragana + ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà (\A, \z, \Z) + ?? \X (== \PM\pM*) + ?? ʸˡÍ×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ + ?? ¸¡º÷°ÌÃÖ°ÜÆ°Ää»ß±é»»»Ò (match_at()¤«¤éONIG_STOP¤òÊÖ¤¹) -I'm thankful to Akinori MUSHA. +and I'm thankful to Akinori MUSHA. ¥¢¥É¥ì¥¹: K.Kosako diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API index 2f66287d49..f3b88756bc 100644 --- a/ext/mbstring/oniguruma/doc/API +++ b/ext/mbstring/oniguruma/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 4.7.1 2007/07/04 +Oniguruma API Version 5.9.2 2008/02/19 #include @@ -105,10 +105,10 @@ Oniguruma API Version 4.7.1 2007/07/04 ONIG_ENCODING_EUC_KR EUC-KR ONIG_ENCODING_EUC_CN EUC-CN ONIG_ENCODING_SJIS Shift_JIS - ONIG_ENCODING_KOI8 KOI8 ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_CP1251 CP1251 ONIG_ENCODING_BIG5 Big5 - ONIG_ENCODING_GB18030 GB 18030 + ONIG_ENCODING_GB18030 GB18030 or any OnigEncodingType data address defined by user. @@ -134,6 +134,18 @@ Oniguruma API Version 4.7.1 2007/07/04 +# int onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Create a regex object. + reg object area is not allocated in this function. + + normal return: ONIG_NORMAL + + + # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) @@ -153,15 +165,12 @@ Oniguruma API Version 4.7.1 2007/07/04 ci->target_enc: target string character encoding. ci->syntax: address of pattern syntax definition. ci->option: compile time option. - ci->ambig_flag: character matching ambiguity bit flag for + ci->case_fold_flag: character matching case fold bit flag for ONIG_OPTION_IGNORECASE mode. - ONIGENC_AMBIGUOUS_MATCH_NONE: exact - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII - ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on - ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) - onig_set_default_ambig_flag() + ONIGENC_CASE_FOLD_MIN: minimum + ONIGENC_CASE_FOLD_DEFAULT: minimum + onig_set_default_case_fold_flag() 5 err_info: address for return optional error info. Use this value as 3rd argument of onig_error_code_to_str(). @@ -188,6 +197,14 @@ Oniguruma API Version 4.7.1 2007/07/04 1 reg: regex object. +# void onig_free_body(regex_t* reg) + + Free memory used by regex object. (Except reg oneself.) + + arguments + 1 reg: regex object. + + # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) @@ -202,8 +219,8 @@ Oniguruma API Version 4.7.1 2007/07/04 3 end: terminate address of target string 4 start: search start address of target string 5 range: search terminate address of target string - in forward search (start <= searched string head < range) - in backward search (range <= searched string head <= start) + in forward search (start <= searched string < range) + in backward search (range <= searched string <= start) 6 region: address for return group match range info (NULL is allowed) 7 option: search time option @@ -335,10 +352,10 @@ Oniguruma API Version 4.7.1 2007/07/04 1 reg: regex object. -# OnigEncoding onig_get_encoding(regex_t* reg) -# OnigOptionType onig_get_options(regex_t* reg) -# OnigAmbigType onig_get_ambig_flag(regex_t* reg) -# OnigSyntaxType* onig_get_syntax(regex_t* reg) +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) Return a value of the regex object. @@ -518,7 +535,7 @@ Oniguruma API Version 4.7.1 2007/07/04 2 from: source address. -# int onig_set_meta_char(OnigEncoding enc, unsigned int what, +# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code) Set a variable meta character to the code point value. @@ -529,8 +546,8 @@ Oniguruma API Version 4.7.1 2007/07/04 normal return: ONIG_NORMAL arguments - 1 enc: target encoding - 2 what: specifies which meta character it is. + 1 syntax: target syntax + 2 what: specifies which meta character it is. ONIG_META_CHAR_ESCAPE ONIG_META_CHAR_ANYCHAR @@ -542,16 +559,16 @@ Oniguruma API Version 4.7.1 2007/07/04 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR. -# OnigAmbigType onig_get_default_ambig_flag() +# OnigCaseFoldType onig_get_default_case_fold_flag() - Get default ambig flag. + Get default case fold flag. -# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag) +# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) - Set default ambig flag. + Set default case fold flag. - 1 ambig_flag: ambiguity flag + 1 case_fold_flag: case fold flag # unsigned int onig_get_match_stack_limit_size(void) @@ -580,6 +597,6 @@ Oniguruma API Version 4.7.1 2007/07/04 # const char* onig_version(void) - Return version string. (ex. "2.2.8") + Return version string. (ex. "5.0.3") // END diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja index f2a8bd6f10..f681fa5460 100644 --- a/ext/mbstring/oniguruma/doc/API.ja +++ b/ext/mbstring/oniguruma/doc/API.ja @@ -1,4 +1,4 @@ -µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 4.7.1 2007/07/04 +µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 5.9.2 2008/02/19 #include @@ -106,10 +106,10 @@ ONIG_ENCODING_EUC_KR EUC-KR ONIG_ENCODING_EUC_CN EUC-CN ONIG_ENCODING_SJIS Shift_JIS - ONIG_ENCODING_KOI8 KOI8 ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_CP1251 CP1251 ONIG_ENCODING_BIG5 Big5 - ONIG_ENCODING_GB18030 GB 18030 + ONIG_ENCODING_GB18030 GB18030 ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigEncodingType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹ @@ -134,6 +134,19 @@ onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë + +# int onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£ + reg¤ÎÎΰè¤òÆâÉô¤Ç³ä¤êÅö¤Æ¤Ê¤¤¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + + # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) @@ -153,15 +166,12 @@ ci->target_enc: ÂоÝʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° ci->syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ ci->option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó - ci->ambig_flag: ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î + ci->case_fold_flag: ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î ʸ»úÛ£Ëæ¥Þ¥Ã¥Á»ØÄê¥Ó¥Ã¥È¥Õ¥é¥° - ONIGENC_AMBIGUOUS_MATCH_NONE: Û£ËæÌµ¤· - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCII¤ÎÂçʸ»ú¾®Ê¸»ú - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII°Ê³°¤ÎÂçʸ»ú¾®Ê¸»ú - ONIGENC_AMBIGUOUS_MATCH_FULL: Á´¤Æ¤ÎÛ£Ëæ¥Õ¥é¥°Í­¸ú - ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) - onig_set_default_ambig_flag() + ONIGENC_CASE_FOLD_MIN: ºÇ¾® + ONIGENC_CASE_FOLD_DEFAULT: ºÇ¾® + onig_set_default_case_fold_flag() 5 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹ onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë @@ -187,6 +197,14 @@ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È +# void onig_free_body(regex_t* reg) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤Î¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£(reg¼«¿È¤ÎÎΰè¤ò½ü¤¤¤Æ) + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) @@ -200,10 +218,10 @@ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È 2 str: ¸¡º÷ÂоÝʸ»úÎó 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹ - 4 start: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ³«»Ï¥¢¥É¥ì¥¹ - 5 range: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ½ªÃ¼¥¢¥É¥ì¥¹ - Á°Êýõº÷ (start <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ < range) - ¸åÊýõº÷ (range <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ <= start) + 4 start: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ¥¢¥É¥ì¥¹ + 5 range: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷½ªÎ»°ÌÃÖ¥¢¥É¥ì¥¹ + Á°Êýõº÷ (start <= õº÷¤µ¤ì¤ëʸ»úÎó < range) + ¸åÊýõº÷ (range <= õº÷¤µ¤ì¤ëʸ»úÎó <= start) 6 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë) 7 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó @@ -340,10 +358,10 @@ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È -# OnigEncoding onig_get_encoding(regex_t* reg) -# OnigOptionType onig_get_options(regex_t* reg) -# OnigAmbigType onig_get_ambig_flag(regex_t* reg) -# OnigSyntaxType* onig_get_syntax(regex_t* reg) +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤ËÂФ·¤Æ¡¢Âбþ¤¹¤ëÃͤòÊÖ¤¹¡£ @@ -524,7 +542,7 @@ 2 from: ¸µ -# int onig_set_meta_char(OnigEncoding enc, unsigned int what, +# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code) ¥á¥¿Ê¸»ú¤ò»ØÄꤷ¤¿¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃͤ˥»¥Ã¥È¤¹¤ë¡£ @@ -535,8 +553,8 @@ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL °ú¿ô - 1 enc: ÂоÝʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° - 2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê + 1 syntax: ÂоÝʸˡ + 2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê ONIG_META_CHAR_ESCAPE ONIG_META_CHAR_ANYCHAR @@ -548,17 +566,17 @@ 3 code: ¥á¥¿Ê¸»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È ¤Þ¤¿¤Ï ONIG_INEFFECTIVE_META_CHAR. -# OnigAmbigType onig_get_default_ambig_flag() +# OnigCaseFoldType onig_get_default_case_fold_flag() - ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£ + ¥Ç¥Õ¥©¥ë¥È¤Îcase fold¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£ -# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag) +# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) - ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£ + ¥Ç¥Õ¥©¥ë¥È¤Îcase fold¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£ °ú¿ô - 1 ambig_flag: Û£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥° + 1 case_fold_flag: case fold¥Õ¥é¥° # unsigned int onig_get_match_stack_limit_size(void) @@ -587,6 +605,6 @@ # const char* onig_version(void) - ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "2.2.8") + ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "5.0.3") // END diff --git a/ext/mbstring/oniguruma/doc/FAQ b/ext/mbstring/oniguruma/doc/FAQ index dccf242c8d..46a3e0e08b 100644 --- a/ext/mbstring/oniguruma/doc/FAQ +++ b/ext/mbstring/oniguruma/doc/FAQ @@ -1,4 +1,4 @@ -FAQ 2006/10/30 +FAQ 2006/11/14 1. Lognest match @@ -12,7 +12,7 @@ FAQ 2006/10/30 (A) Oniguruma Layer - Define the macro below at NOT_RUBY case in oniguruma/regint.h. + Define the macro below in oniguruma/regint.h. USE_MULTI_THREAD_SYSTEM THREAD_ATOMIC_START diff --git a/ext/mbstring/oniguruma/doc/FAQ.ja b/ext/mbstring/oniguruma/doc/FAQ.ja index 5582765ee6..1d65f9fb62 100644 --- a/ext/mbstring/oniguruma/doc/FAQ.ja +++ b/ext/mbstring/oniguruma/doc/FAQ.ja @@ -1,4 +1,4 @@ -FAQ 2006/10/30 +FAQ 2007/07/23 1. ºÇĹ¥Þ¥Ã¥Á @@ -13,7 +13,7 @@ FAQ 2006/10/30 (A) Oniguruma Layer - oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤ÎÉôʬ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£ + oniguruma/regint.h¤ÎÃæ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£ USE_MULTI_THREAD_SYSTEM THREAD_ATOMIC_START @@ -35,7 +35,16 @@ FAQ 2006/10/30 "¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­"¤Ë½ñ¤¤¤Æ¤ª¤¤¤¿¡£ -3. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È +3. CR + LF + + DOS¤Î²þ¹Ô(CR(0x0c) + LF(0x0a)¤ÎϢ³) + + regenc.h¤ÎÃæ¤Î¡¢°Ê²¼¤ÎÉôʬ¤òÍ­¸ú¤Ë¤¹¤ë¡£ + + /* #define USE_CRNL_AS_LINE_TERMINATOR */ + + +4. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È µ´¼Ö¤Ë´Ø¤¹¤ë¥á¡¼¥ê¥ó¥°¥ê¥¹¥È¤Ï¸ºß¤·¤Ê¤¤¡£ @@ -59,8 +68,7 @@ Oniguruma (A) Oniguruma¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç -oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤Ç°Ï¤Þ¤ì¤Æ¤¤¤ëÉôʬ¤ÎÃæ¤Ç -°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£ +oniguruma/regint.h¤ÎÃæ¤Ç°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£ USE_MULTI_THREAD_SYSTEM diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE index 5a2783d167..21efe531a4 100644 --- a/ext/mbstring/oniguruma/doc/RE +++ b/ext/mbstring/oniguruma/doc/RE @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 4.3.0 2006/08/17 +Oniguruma Regular Expressions Version 5.9.1 2007/09/05 syntax: ONIG_SYNTAX_RUBY (default) @@ -70,6 +70,38 @@ syntax: ONIG_SYNTAX_RUBY (default) \H non hexadecimal digit char + Character Property + + * \p{property-name} + * \p{^property-name} (negative) + * \P{property-name} (negative) + + property-name: + + + works on all encodings + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, Word, ASCII, + + + works on EUC_JP, Shift_JIS + Hiragana, Katakana + + + works on UTF8, UTF16, UTF32 + Any, Assigned, C, Cc, Cf, Cn, Co, Cs, L, Ll, Lm, Lo, Lt, Lu, + M, Mc, Me, Mn, N, Nd, Nl, No, P, Pc, Pd, Pe, Pf, Pi, Po, Ps, + S, Sc, Sk, Sm, So, Z, Zl, Zp, Zs, + Arabic, Armenian, Bengali, Bopomofo, Braille, Buginese, + Buhid, Canadian_Aboriginal, Cherokee, Common, Coptic, + Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian, + Glagolitic, Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul, + Hanunoo, Hebrew, Hiragana, Inherited, Kannada, Katakana, + Kharoshthi, Khmer, Lao, Latin, Limbu, Linear_B, Malayalam, + Mongolian, Myanmar, New_Tai_Lue, Ogham, Old_Italic, Old_Persian, + Oriya, Osmanya, Runic, Shavian, Sinhala, Syloti_Nagri, Syriac, + Tagalog, Tagbanwa, Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan, + Tifinagh, Ugaritic, Yi + + + 4. Quantifier greedy @@ -111,11 +143,7 @@ syntax: ONIG_SYNTAX_RUBY (default) \A beginning of string \Z end of string, or before newline at the end \z end of string - \G matching start position (*) - - * Ruby Regexp: - previous end-of-match position - (This specification is not related to this library.) + \G matching start position 6. Character class @@ -135,40 +163,43 @@ syntax: ONIG_SYNTAX_RUBY (default) Not Unicode Case: - alnum alphabet or digit char - alpha alphabet - ascii code value: [0 - 127] - blank \t, \x20 - cntrl - digit 0-9 - graph include all of multibyte encoded characters - lower - print include all of multibyte encoded characters - punct - space \t, \n, \v, \f, \r, \x20 - upper - xdigit 0-9, a-f, A-F + alnum alphabet or digit char + alpha alphabet + ascii code value: [0 - 127] + blank \t, \x20 + cntrl + digit 0-9 + graph include all of multibyte encoded characters + lower + print include all of multibyte encoded characters + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + word alphanumeric, "_" and multibyte characters Unicode Case: - alnum Letter | Mark | Decimal_Number - alpha Letter | Mark - ascii 0000 - 007F - blank Space_Separator | 0009 - cntrl Control | Format | Unassigned | Private_Use | Surrogate - digit Decimal_Number - graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate - lower Lowercase_Letter - print [[:graph:]] | [[:space:]] - punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | - Final_Punctuation | Initial_Punctuation | Other_Punctuation | - Open_Punctuation - space Space_Separator | Line_Separator | Paragraph_Separator | - 0009 | 000A | 000B | 000C | 000D | 0085 - upper Uppercase_Letter - xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 - (0-9, a-f, A-F) + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + 0009 | 000A | 000B | 000C | 000D | 0085 + upper Uppercase_Letter + xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + (0-9, a-f, A-F) + word Letter | Mark | Decimal_Number | Connector_Punctuation + 7. Extended groups @@ -200,9 +231,9 @@ syntax: ONIG_SYNTAX_RUBY (default) (?>subexp) atomic group don't backtrack in subexp. - (?subexp) define named group - (All characters of the name must be a word character. - And first character must not be a digit or uppper case) + (?subexp), (?'name'subexp) + define named group + (All characters of the name must be a word character.) Not only a name but a number is assigned like a captured group. @@ -215,7 +246,12 @@ syntax: ONIG_SYNTAX_RUBY (default) 8. Back reference \n back reference by group number (n >= 1) + \k back reference by group number (n >= 1) + \k'n' back reference by group number (n >= 1) + \k<-n> back reference by relative group number (n >= 1) + \k'-n' back reference by relative group number (n >= 1) \k back reference by group name + \k'name' back reference by group name In the back reference by the multiplex definition name, a subexp with a large number is referred to preferentially. @@ -227,10 +263,17 @@ syntax: ONIG_SYNTAX_RUBY (default) back reference with nest level - (This function is disabled in Ruby 1.9.) + level: 0, 1, 2, ... - \k n: 0, 1, 2, ... - \k n: 0, 1, 2, ... + \k (n >= 1) + \k (n >= 1) + \k'n+level' (n >= 1) + \k'n-level' (n >= 1) + + \k + \k + \k'name+level' + \k'name-level' Destinate relative nest level from back reference position. @@ -256,7 +299,11 @@ syntax: ONIG_SYNTAX_RUBY (default) 9. Subexp call ("Tanaka Akira special") \g call by group name + \g'name' call by group name \g call by group number (n >= 1) + \g'n' call by group number (n >= 1) + \g<-n> call by relative group number (n >= 1) + \g'-n' call by relative group number (n >= 1) * left-most recursive call is not allowed. ex. (?a|\gb) => error @@ -300,7 +347,6 @@ syntax: ONIG_SYNTAX_RUBY (default) ('g' and 'G' options are argued in ruby-dev ML) - These options are not implemented in Ruby level. ----------------------------- @@ -317,14 +363,13 @@ A-1. Syntax depend options A-2. Original extensions + hexadecimal digit char type \h, \H - + named group (?...) + + named group (?...), (?'name'...) + named backref \k + subexp call \g, \g A-3. Lacked features compare with perl 5.8.0 - + [:word:] + \N{name} + \l,\u,\L,\U, \X, \C + (?{code}) @@ -334,20 +379,10 @@ A-3. Lacked features compare with perl 5.8.0 * \Q...\E This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. - * \p{property}, \P{property} - This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. - Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, - Print, Punct, Space, Upper, XDigit, ASCII are supported. - - Prefix 'Is' of property name is allowed in ONIG_SYNTAX_PERL only. - ex. \p{IsXDigit}. - - Negation operator of property is supported in ONIG_SYNTAX_PERL only. - \p{^...}, \P{^...} +A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8 -A-4. Differences with Japanized GNU regex(version 0.12) of Ruby - + + add character property (\p{property}, \P{property}) + add hexadecimal digit char type (\h, \H) + add look-behind (?<=fixed-char-length-pattern), (?¼°) ¸¶»ÒŪ¼°½¸¹ç ¼°Á´ÂΤòÄ̲ᤷ¤¿¤È¤­¡¢¼°¤ÎÃæ¤Ç¤Î¸åÂàºÆ»î¹Ô¤ò¹Ô¤Ê¤ï¤Ê¤¤ - (?¼°) ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç + (?¼°), (?'name'¼°) + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç ¼°½¸¹ç¤Ë̾Á°¤ò³ä¤êÅö¤Æ¤ë(ÄêµÁ¤¹¤ë)¡£ - (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤Îʸ»ú¤Ï - ±ÑÂçʸ»ú¤Ç¤¢¤Ã¤Æ¤Ï¤¤¤±¤Ê¤¤¡£) + (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£) ̾Á°¤À¤±¤Ç¤Ê¤¯¡¢Êá³Í¼°½¸¹ç¤ÈƱÍͤËÈÖ¹æ¤â³ä¤êÅö¤Æ¤é¤ì¤ë¡£ ÈÖ¹æ»ØÄ꤬¶Ø»ß¤µ¤ì¤Æ¤¤¤Ê¤¤¾õÂÖ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È) @@ -215,8 +244,13 @@ 8. ¸åÊý»²¾È - \n ÈÖ¹æ»ØÄ껲¾È (n >= 1) + \n ÈÖ¹æ»ØÄ껲¾È (n >= 1) + \k ÈÖ¹æ»ØÄ껲¾È (n >= 1) + \k'n' ÈÖ¹æ»ØÄ껲¾È (n >= 1) + \k<-n> ÁêÂÐÈÖ¹æ»ØÄ껲¾È (n >= 1) + \k'-n' ÁêÂÐÈÖ¹æ»ØÄ껲¾È (n >= 1) \k ̾Á°»ØÄ껲¾È + \k'name' ̾Á°»ØÄ껲¾È ̾Á°»ØÄ껲¾È¤Ç¡¢¤½¤Î̾Á°¤¬Ê£¿ô¤Î¼°½¸¹ç¤Ç¿½ÅÄêµÁ¤µ¤ì¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢ ÈÖ¹æ¤ÎÂ礭¤¤¼°½¸¹ç¤«¤éÍ¥ÀèŪ¤Ë»²¾È¤µ¤ì¤ë¡£ @@ -229,10 +263,17 @@ ¥Í¥¹¥È¥ì¥Ù¥ëÉÕ¤­¸åÊý»²¾È - ¤³¤Îµ¡Ç½¤Ï¸½ºß¡¢Ruby 1.9¤Ç¤Ï̵¸ú¤Ë¤·¤Æ¤¤¤ë¡£ + level: 0, 1, 2, ... + + \k (n >= 1) + \k (n >= 1) + \k'n+level' (n >= 1) + \k'n-level' (n >= 1) - \k n: 0, 1, 2, ... - \k n: 0, 1, 2, ... + \k + \k + \k'name+level' + \k'name-level' ¸åÊý»²¾È¤Î°ÌÃÖ¤«¤éÁêÂÐŪ¤ÊÉôʬ¼°¸Æ½Ð¤·¥Í¥¹¥È¥ì¥Ù¥ë¤ò»ØÄꤷ¤Æ¡¢¤½¤Î¥ì¥Ù¥ë¤Ç¤Î Êá³ÍÃͤò»²¾È¤¹¤ë¡£ @@ -259,7 +300,11 @@ 9. Éôʬ¼°¸Æ½Ð¤· ("ÅÄÃæÅ¯¥¹¥Ú¥·¥ã¥ë") \g ̾Á°»ØÄê¸Æ½Ð¤· - \g ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) + \g'name' ̾Á°»ØÄê¸Æ½Ð¤· + \g ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) + \g'n' ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) + \g<-n> ÁêÂÐÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) + \g'-n' ÁêÂÐÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) ¢¨ ºÇº¸°ÌÃ֤ǤκƵ¢¸Æ½Ð¤·¤Ï¶Ø»ß¤µ¤ì¤ë¡£ Îã. (?a|\gb) => error @@ -306,7 +351,6 @@ ¤³¤ì¤é¤Î¿¶Éñ¤Î°ÕÌ£¤Ï¡¢ ̾Á°ÉÕ¤­Êá³Í¤È̾Á°Ìµ¤·Êá³Í¤òƱ»þ¤Ë»ÈÍѤ¹¤ëɬÁ³À­¤Î¤¢¤ë¾ìÌ̤Ͼ¯¤Ê¤¤¤Ç¤¢¤í¤¦ ¤È¤¤¤¦Íýͳ¤«¤é¹Í¤¨¤é¤ì¤¿¤â¤Î¤Ç¤¢¤ë¡£ - ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ë¤Ä¤¤¤Æ¤Ï¡¢Ruby¤Ç¤Ï¸½ºß¼ÂÁõ¤µ¤ì¤Æ¤¤¤Ê¤¤¡£ ----------------------------- @@ -323,14 +367,13 @@ Êäµ­ 2. ÆÈ¼«³ÈÄ¥µ¡Ç½ + 16¿Ê¿ô¿ô»ú¡¢Èó16¿Ê¿ô»ú \h, \H - + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç (?...) + + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç (?...), (?'name'...) + ̾Á°»ØÄê¸åÊý»²¾È \k + Éôʬ¼°¸Æ½Ð¤· \g, \g Êäµ­ 3. Perl 5.8.0¤ÈÈæ³Ó¤·¤ÆÂ¸ºß¤·¤Ê¤¤µ¡Ç½ - + [:word:] + \N{name} + \l,\u,\L,\U, \X, \C + (?{code}) @@ -340,21 +383,10 @@ * \Q...\E ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú - * \p{property}, \P{property} - ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú - Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, - Print, Punct, Space, Upper, XDigit, ASCII¤¬»ØÄê¤Ç¤­¤ë¡£ - - ÆÃÀ­Ì¾¤ÎÁ°¤Ë 'Is'Á°ÃÖ»ì¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ß - µö¤µ¤ì¤Æ¤¤¤ë¡£ - ex. \p{IsXDigit}. - ÆÃÀ­¤ÎÈÝÄê±é»»»Ò¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ßµö¤µ¤ì¤Æ¤¤¤ë¡£ - \p{^...}, \P{^...} - - -Êäµ­ 4. Ruby¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤ +Êäµ­ 4. Ruby 1.8 ¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤ + + ʸ»úPropertyµ¡Ç½Äɲà (\p{property}, \P{Property}) + 16¿Ê¿ô»ú¥¿¥¤¥×Äɲà (\h, \H) + Ìá¤êÆÉ¤ßµ¡Ç½¤òÄɲà + ¶¯Íߤʷ«¤êÊÖ¤·»ØÄê»Ò¤òÄɲà (?+, *+, ++) @@ -411,14 +443,18 @@ Êäµ­ 6. ÌäÂêÅÀ - + UTF-8¤Ç¡¢¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£ + + ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£ + + Îã: UTF-8 * ÀèÆ¬¥Ð¥¤¥È¤È¤·¤ÆÉÔÀµ¤Ê¥Ð¥¤¥È¤ò°ìʸ»ú¤È¤ß¤Ê¤¹ /./u =~ "\xa3" * ÉÔ´°Á´¤Ê¥Ð¥¤¥È¥·¡¼¥±¥ó¥¹¤Î¥Á¥§¥Ã¥¯¤ò¤·¤Ê¤¤ - /\w+/ =~ "a\xf3\x8ec" + /\w+/u =~ "a\xf3\x8ec" ¤³¤ì¤òÄ´¤Ù¤ë¤³¤È¤Ï²Äǽ¤Ç¤Ï¤¢¤ë¤¬¡¢ÃÙ¤¯¤Ê¤ë¤Î¤Ç¹Ô¤Ê¤ï¤Ê¤¤¡£ + ʸ»úÎó¤È¤·¤Æ¡¢¤½¤Î¤è¤¦¤Ê¥Ð¥¤¥ÈÎó¤ò»ØÄꤷ¤¿¾ì¹ç¤Îưºî¤ÏÊݾڤ·¤Ê¤¤¡£ + ½ª¤ê diff --git a/ext/mbstring/oniguruma/enc/ascii.c b/ext/mbstring/oniguruma/enc/ascii.c index 64be21d7ff..c2715f4e0d 100644 --- a/ext/mbstring/oniguruma/enc/ascii.c +++ b/ext/mbstring/oniguruma/enc/ascii.c @@ -2,7 +2,7 @@ ascii.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,23 +43,14 @@ OnigEncodingType OnigEncodingASCII = { "US-ASCII", /* name */ 1, /* max byte length */ 1, /* min byte length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, ascii_is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c index 86792666a4..ca1e01b463 100644 --- a/ext/mbstring/oniguruma/enc/big5.c +++ b/ext/mbstring/oniguruma/enc/big5.c @@ -2,7 +2,7 @@ big5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,18 +67,21 @@ big5_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) { - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag, - pp, end, lower); + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag, + pp, end, lower); } +#if 0 static int -big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +big5_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); } +#endif static int big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -125,14 +128,14 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s) } } } - len = enc_len(ONIG_ENCODING_BIG5, p); + len = enclen(ONIG_ENCODING_BIG5, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); } static int -big5_is_allowed_reverse_match(const UChar* s, const UChar* end) +big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) { const UChar c = *s; @@ -144,23 +147,14 @@ OnigEncodingType OnigEncodingBIG5 = { "Big5", /* name */ 2, /* max enc length */ 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, big5_mbc_to_code, onigenc_mb2_code_to_mbclen, big5_code_to_mbc, - big5_mbc_to_normalize, - big5_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + big5_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, big5_is_code_ctype, onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/cp1251.c b/ext/mbstring/oniguruma/enc/cp1251.c new file mode 100644 index 0000000000..63e58d2cd8 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/cp1251.c @@ -0,0 +1,200 @@ +/********************************************************************** + cp1251.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2006-2007 Byte + * K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c] +#define ENC_IS_CP1251_CTYPE(code,ctype) \ + ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncCP1251_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncCP1251_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, + 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2, + 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0, + 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_CP1251_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +static int +cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_CP1251_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb8, 0xa8 }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + +static int +cp1251_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingCP1251 = { + onigenc_single_byte_mbc_enc_len, + "CP1251", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + cp1251_mbc_case_fold, + cp1251_apply_all_case_fold, + cp1251_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + cp1251_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c index 71c81ee9fe..f605297cc3 100644 --- a/ext/mbstring/oniguruma/enc/euc_jp.c +++ b/ext/mbstring/oniguruma/enc/euc_jp.c @@ -2,7 +2,7 @@ euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) @@ -51,18 +51,18 @@ static const int EncLen_EUCJP[] = { }; static int -eucjp_mbc_enc_len(const UChar* p) +mbc_enc_len(const UChar* p) { return EncLen_EUCJP[*p]; } static OnigCodePoint -eucjp_mbc_to_code(const UChar* p, const UChar* end) +mbc_to_code(const UChar* p, const UChar* end) { int c, i, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_EUC_JP, p); + len = enclen(ONIG_ENCODING_EUC_JP, p); n = (OnigCodePoint )*p++; if (len == 1) return n; @@ -75,17 +75,18 @@ eucjp_mbc_to_code(const UChar* p, const UChar* end) } static int -eucjp_code_to_mbclen(OnigCodePoint code) +code_to_mbclen(OnigCodePoint code) { if (ONIGENC_IS_CODE_ASCII(code)) return 1; else if ((code & 0xff0000) != 0) return 3; else if ((code & 0xff00) != 0) return 2; - else return 0; + else + return ONIGERR_INVALID_CODE_POINT_VALUE; } #if 0 static int -eucjp_code_to_mbc_first(OnigCodePoint code) +code_to_mbc_first(OnigCodePoint code) { int first; @@ -103,7 +104,7 @@ eucjp_code_to_mbc_first(OnigCodePoint code) #endif static int -eucjp_code_to_mbc(OnigCodePoint code, UChar *buf) +code_to_mbc(OnigCodePoint code, UChar *buf) { UChar *p = buf; @@ -112,66 +113,38 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } static int -eucjp_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { int len; const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); (*pp)++; return 1; } else { - len = enc_len(ONIG_ENCODING_EUC_JP, p); - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } + int i; + + len = enclen(ONIG_ENCODING_EUC_JP, p); + for (i = 0; i < len; i++) { + *lower++ = *p++; } (*pp) += len; return len; /* return byte length of converted char to lower */ } } -static int -eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end); -} - -static int -eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if ((ctype & (ONIGENC_CTYPE_WORD | - ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { - return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - static UChar* -eucjp_left_adjust_char_head(const UChar* start, const UChar* s) +left_adjust_char_head(const UChar* start, const UChar* s) { /* In this encoding mb-trail bytes doesn't mix with single bytes. @@ -183,14 +156,14 @@ eucjp_left_adjust_char_head(const UChar* start, const UChar* s) p = s; while (!eucjp_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_JP, p); + len = enclen(ONIG_ENCODING_EUC_JP, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); } static int -eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end) +is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) { const UChar c = *s; if (c <= 0x7e || c == 0x8e || c == 0x8f) @@ -199,30 +172,114 @@ eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end) return FALSE; } + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0xa4a1, 0xa4f3 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 3, + 0xa5a1, 0xa5f6, + 0xaaa6, 0xaaaf, + 0xaab1, 0xaadd +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + hash_data_type ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return (int )ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + + OnigEncodingType OnigEncodingEUC_JP = { - eucjp_mbc_enc_len, + mbc_enc_len, "EUC-JP", /* name */ 3, /* max enc length */ 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, - eucjp_mbc_to_code, - eucjp_code_to_mbclen, - eucjp_code_to_mbc, - eucjp_mbc_to_normalize, - eucjp_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - eucjp_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - eucjp_left_adjust_char_head, - eucjp_is_allowed_reverse_match + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c index 57bf801536..1beef09001 100644 --- a/ext/mbstring/oniguruma/enc/euc_kr.c +++ b/ext/mbstring/oniguruma/enc/euc_kr.c @@ -2,7 +2,7 @@ euc_kr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,18 +67,21 @@ euckr_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) { - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag, - pp, end, lower); + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag, + pp, end, lower); } +#if 0 static int -euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +euckr_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); } +#endif static int euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -101,14 +104,14 @@ euckr_left_adjust_char_head(const UChar* start, const UChar* s) p = s; while (!euckr_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_KR, p); + len = enclen(ONIG_ENCODING_EUC_KR, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); } static int -euckr_is_allowed_reverse_match(const UChar* s, const UChar* end) +euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) { const UChar c = *s; if (c <= 0x7e) return TRUE; @@ -120,23 +123,14 @@ OnigEncodingType OnigEncodingEUC_KR = { "EUC-KR", /* name */ 2, /* max enc length */ 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, euckr_mbc_to_code, onigenc_mb2_code_to_mbclen, euckr_code_to_mbc, - euckr_mbc_to_normalize, - euckr_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, euckr_is_code_ctype, onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, @@ -149,23 +143,14 @@ OnigEncodingType OnigEncodingEUC_CN = { "EUC-CN", /* name */ 2, /* max enc length */ 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, euckr_mbc_to_code, onigenc_mb2_code_to_mbclen, euckr_code_to_mbc, - euckr_mbc_to_normalize, - euckr_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, euckr_is_code_ctype, onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c index 6f396e75e6..2ddeb9318a 100644 --- a/ext/mbstring/oniguruma/enc/euc_tw.c +++ b/ext/mbstring/oniguruma/enc/euc_tw.c @@ -2,7 +2,7 @@ euc_tw.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,17 +67,11 @@ euctw_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) { - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag, - pp, end, lower); -} - -static int -euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end); + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag, + pp, end, lower); } static int @@ -86,7 +80,7 @@ euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); } -#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff) +#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) static UChar* euctw_left_adjust_char_head(const UChar* start, const UChar* s) @@ -101,14 +95,14 @@ euctw_left_adjust_char_head(const UChar* start, const UChar* s) p = s; while (!euctw_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_TW, p); + len = enclen(ONIG_ENCODING_EUC_TW, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); } static int -euctw_is_allowed_reverse_match(const UChar* s, const UChar* end) +euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) { const UChar c = *s; if (c <= 0x7e) return TRUE; @@ -120,23 +114,14 @@ OnigEncodingType OnigEncodingEUC_TW = { "EUC-TW", /* name */ 4, /* max enc length */ 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, euctw_mbc_to_code, onigenc_mb4_code_to_mbclen, euctw_code_to_mbc, - euctw_mbc_to_normalize, - euctw_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + euctw_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, euctw_is_code_ctype, onigenc_not_support_get_ctype_code_range, euctw_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/gb18030.c b/ext/mbstring/oniguruma/enc/gb18030.c index 01995ea094..6bbd109eab 100644 --- a/ext/mbstring/oniguruma/enc/gb18030.c +++ b/ext/mbstring/oniguruma/enc/gb18030.c @@ -2,8 +2,8 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005 KUBO Takehiro - * K.Kosako + * Copyright (c) 2005-2007 KUBO Takehiro + * K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -88,18 +88,21 @@ gb18030_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) { - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag, - pp, end, lower); + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag, + pp, end, lower); } +#if 0 static int -gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); } +#endif static int gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -467,7 +470,7 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) } static int -gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end) +gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) { return GB18030_MAP[*s] == C1 ? TRUE : FALSE; } @@ -477,23 +480,14 @@ OnigEncodingType OnigEncodingGB18030 = { "GB18030", /* name */ 4, /* max enc length */ 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, gb18030_mbc_to_code, onigenc_mb4_code_to_mbclen, gb18030_code_to_mbc, - gb18030_mbc_to_normalize, - gb18030_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + gb18030_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, gb18030_is_code_ctype, onigenc_not_support_get_ctype_code_range, gb18030_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c index 5646f26c10..174b97f026 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_1.c +++ b/ext/mbstring/oniguruma/enc/iso8859_1.c @@ -2,7 +2,7 @@ iso8859_1.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,90 +30,221 @@ #include "regenc.h" #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ - ((EncISO_8859_1_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const unsigned short EncISO_8859_1_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } }; static int -iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - const UChar* p = *pp; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; } - else { - *lower = *p; + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; } - (*pp)++; - return 1; /* return byte length of converted char to lower */ + else if (0xc0 <= *p && *p <= 0xcf) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0xd0 <= *p && *p <= 0xdf) { + if (*p == 0xdf) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else if (*p != 0xd7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + } + else if (0xe0 <= *p && *p <= 0xef) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else if (0xf0 <= *p && *p <= 0xfe) { + if (*p != 0xf7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + } + + return 0; } static int -iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_1_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) - return FALSE; - else - return TRUE; - } + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; - return (v != 0 ? TRUE : FALSE); + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; } - return FALSE; + + (*pp)++; + v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_1_CTYPE(code, ctype); @@ -126,25 +257,15 @@ OnigEncodingType OnigEncodingISO_8859_1 = { "ISO-8859-1", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_1_mbc_to_normalize, - iso_8859_1_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_1_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c index 8081ef8010..e35c19d78f 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_10.c +++ b/ext/mbstring/oniguruma/enc/iso8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ - ((EncISO_8859_10_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,88 +69,82 @@ static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_10_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; static int -iso_8859_10_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_10_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_10_CTYPE(code, ctype); @@ -158,116 +152,71 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa2, 0xb2 }, + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa8, 0xb8 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa2, 0xb2 }, - { 0xa3, 0xb3 }, - { 0xa4, 0xb4 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa8, 0xb8 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - { 0xaf, 0xbf }, - - { 0xb1, 0xa1 }, - { 0xb2, 0xa2 }, - { 0xb3, 0xa3 }, - { 0xb4, 0xa4 }, - { 0xb5, 0xa5 }, - { 0xb6, 0xa6 }, - { 0xb8, 0xa8 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbe, 0xae }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_10 = { @@ -275,25 +224,15 @@ OnigEncodingType OnigEncodingISO_8859_10 = { "ISO-8859-10", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_10_mbc_to_normalize, - iso_8859_10_is_mbc_ambiguous, - iso_8859_10_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_10_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c index de9bb3b825..8a460a3047 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_11.c +++ b/ext/mbstring/oniguruma/enc/iso8859_11.c @@ -2,7 +2,7 @@ iso8859_11.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,45 +30,45 @@ #include "regenc.h" #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ - ((EncISO_8859_11_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const unsigned short EncISO_8859_11_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000 + 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000 }; static int -iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_11_CTYPE(code, ctype); @@ -81,24 +81,15 @@ OnigEncodingType OnigEncodingISO_8859_11 = { "ISO-8859-11", /* name */ 1, /* max enc length */ 1, /* min enc length */ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_11_is_code_ctype, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c index 69316edfc3..3670d92ea5 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_13.c +++ b/ext/mbstring/oniguruma/enc/iso8859_13.c @@ -31,7 +31,7 @@ #define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ - ((EncISO_8859_13_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,84 +69,83 @@ static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_13_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0, - 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0 + 0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0 }; static int -mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_13_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xb5 are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xb5) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xb5 are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -157,85 +156,56 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_13 = { @@ -243,24 +213,14 @@ OnigEncodingType OnigEncodingISO_8859_13 = { "ISO-8859-13", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - mbc_to_normalize, - is_mbc_ambiguous, - get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c index 44638cf13a..3596d4479a 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_14.c +++ b/ext/mbstring/oniguruma/enc/iso8859_14.c @@ -31,7 +31,7 @@ #define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ - ((EncISO_8859_14_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,84 +69,80 @@ static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_14_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0, - 0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2, - 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2, - 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2, + 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; static int -mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); (*pp)++; return 1; /* return byte length of converted char to lower */ } +#if 0 static int -is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_14_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -157,115 +153,72 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa4, 0xa5 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, + + { 0xb0, 0xb1 }, + { 0xb2, 0xb3 }, + { 0xb4, 0xb5 }, + { 0xb7, 0xb9 }, + { 0xbb, 0xbf }, + { 0xbd, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa4, 0xa5 }, - { 0xa5, 0xa4 }, - { 0xa6, 0xab }, - { 0xa8, 0xb8 }, - { 0xaa, 0xba }, - { 0xab, 0xa6 }, - { 0xac, 0xbc }, - { 0xaf, 0xff }, - - { 0xb0, 0xb1 }, - { 0xb1, 0xb0 }, - { 0xb2, 0xb3 }, - { 0xb3, 0xb2 }, - { 0xb4, 0xb5 }, - { 0xb5, 0xb4 }, - { 0xb7, 0xb9 }, - { 0xb8, 0xa8 }, - { 0xb9, 0xb7 }, - { 0xba, 0xaa }, - { 0xbb, 0xbf }, - { 0xbc, 0xac }, - { 0xbd, 0xbe }, - { 0xbe, 0xbd }, - { 0xbf, 0xbb }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xaf } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_14 = { @@ -273,24 +226,14 @@ OnigEncodingType OnigEncodingISO_8859_14 = { "ISO-8859-14", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - mbc_to_normalize, - is_mbc_ambiguous, - get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c index f643b895df..08492fb4d9 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_15.c +++ b/ext/mbstring/oniguruma/enc/iso8859_15.c @@ -31,7 +31,7 @@ #define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ - ((EncISO_8859_15_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,84 +69,84 @@ static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_15_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, - 0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0, - 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; static int -mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); (*pp)++; return 1; /* return byte length of converted char to lower */ } +#if 0 static int -is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_15_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf etc.. are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p == 0xaa || *p == 0xb5 || *p == 0xba) + return FALSE; + else + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -157,96 +157,62 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa6, 0xa8 }, + + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa6, 0xa8 }, - { 0xa8, 0xa6 }, - - { 0xb4, 0xb8 }, - { 0xb8, 0xb4 }, - { 0xbc, 0xbd }, - { 0xbd, 0xbc }, - { 0xbe, 0xff }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xbe } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_15 = { @@ -254,24 +220,14 @@ OnigEncodingType OnigEncodingISO_8859_15 = { "ISO-8859-15", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - mbc_to_normalize, - is_mbc_ambiguous, - get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c index 921ae36d9d..8b39c58a6b 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_16.c +++ b/ext/mbstring/oniguruma/enc/iso8859_16.c @@ -31,7 +31,7 @@ #define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ - ((EncISO_8859_16_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,84 +69,79 @@ static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_16_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0, - 0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2, - 0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0, - 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2, + 0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; static int -mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); (*pp)++; return 1; /* return byte length of converted char to lower */ } +#if 0 static int -is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_16_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -157,109 +152,69 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xaf, 0xbf }, + + { 0xb2, 0xb9 }, + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa3, 0xb3 }, - { 0xa6, 0xa8 }, - { 0xa8, 0xa6 }, - { 0xaa, 0xba }, - { 0xac, 0xae }, - { 0xae, 0xac }, - { 0xaf, 0xbf }, - - { 0xb2, 0xb9 }, - { 0xb3, 0xa3 }, - { 0xb4, 0xb8 }, - { 0xb8, 0xb4 }, - { 0xb9, 0xb2 }, - { 0xba, 0xaa }, - { 0xbc, 0xbd }, - { 0xbd, 0xbc }, - { 0xbe, 0xff }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xbe } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_16 = { @@ -267,24 +222,14 @@ OnigEncodingType OnigEncodingISO_8859_16 = { "ISO-8859-16", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - mbc_to_normalize, - is_mbc_ambiguous, - get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c index f8cb3756f2..80b93ba1ba 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_2.c +++ b/ext/mbstring/oniguruma/enc/iso8859_2.c @@ -2,7 +2,7 @@ iso8859_2.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ - ((EncISO_8859_2_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,192 +69,145 @@ static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_2_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0, - 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, - 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 + 0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 }; static int -iso_8859_2_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); (*pp)++; return 1; /* return byte length of converted char to lower */ } +#if 0 static int -iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_2_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; static int -iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa3, 0xb3 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - { 0xaf, 0xbf }, - - { 0xb1, 0xa1 }, - { 0xb3, 0xa3 }, - { 0xb5, 0xa5 }, - { 0xb6, 0xa6 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbe, 0xae }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } static int -iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_2_CTYPE(code, ctype); @@ -267,25 +220,15 @@ OnigEncodingType OnigEncodingISO_8859_2 = { "ISO-8859-2", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_2_mbc_to_normalize, - iso_8859_2_is_mbc_ambiguous, - iso_8859_2_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_2_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c index e62d20de7b..fd1168c381 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_3.c +++ b/ext/mbstring/oniguruma/enc/iso8859_3.c @@ -2,7 +2,7 @@ iso8859_3.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ - ((EncISO_8859_3_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,88 +69,86 @@ static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_3_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0, - 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2, - 0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 + 0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2, + 0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 }; static int -iso_8859_3_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_3_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xb5) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_3_CTYPE(code, ctype); @@ -158,97 +156,63 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } -static int -iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa6, 0xb6 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xaf, 0xbf }, - { 0xb1, 0xa1 }, - { 0xb6, 0xa6 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_3 = { @@ -256,25 +220,15 @@ OnigEncodingType OnigEncodingISO_8859_3 = { "ISO-8859-3", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_3_mbc_to_normalize, - iso_8859_3_is_mbc_ambiguous, - iso_8859_3_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_3_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c index dd6bd7dfe3..c124f5653d 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_4.c +++ b/ext/mbstring/oniguruma/enc/iso8859_4.c @@ -2,7 +2,7 @@ iso8859_4.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ - ((EncISO_8859_4_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,88 +69,85 @@ static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_4_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0, - 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0, - 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 }; static int -iso_8859_4_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); (*pp)++; return 1; /* return byte length of converted char to lower */ } +#if 0 static int -iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_4_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xa2) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xa2) + return FALSE; + else + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_4_CTYPE(code, ctype); @@ -158,106 +155,66 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa3, 0xb3 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - - { 0xb1, 0xa1 }, - { 0xb3, 0xa3 }, - { 0xb5, 0xa5 }, - { 0xb6, 0xa6 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbe, 0xae }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_4 = { @@ -265,25 +222,15 @@ OnigEncodingType OnigEncodingISO_8859_4 = { "ISO-8859-4", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_4_mbc_to_normalize, - iso_8859_4_is_mbc_ambiguous, - iso_8859_4_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_4_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c index 87b7fb8a29..1ca67e735f 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_5.c +++ b/ext/mbstring/oniguruma/enc/iso8859_5.c @@ -2,7 +2,7 @@ iso8859_5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ - ((EncISO_8859_5_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,80 +69,66 @@ static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_5_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2 + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2 }; static int -iso_8859_5_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - + *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_5_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - return (v != 0 ? TRUE : FALSE); - } - return FALSE; + v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_5_CTYPE(code, ctype); @@ -150,120 +136,74 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xf1 }, + { 0xa2, 0xf2 }, + { 0xa3, 0xf3 }, + { 0xa4, 0xf4 }, + { 0xa5, 0xf5 }, + { 0xa6, 0xf6 }, + { 0xa7, 0xf7 }, + { 0xa8, 0xf8 }, + { 0xa9, 0xf9 }, + { 0xaa, 0xfa }, + { 0xab, 0xfb }, + { 0xac, 0xfc }, + { 0xae, 0xfe }, + { 0xaf, 0xff }, + + { 0xb0, 0xd0 }, + { 0xb1, 0xd1 }, + { 0xb2, 0xd2 }, + { 0xb3, 0xd3 }, + { 0xb4, 0xd4 }, + { 0xb5, 0xd5 }, + { 0xb6, 0xd6 }, + { 0xb7, 0xd7 }, + { 0xb8, 0xd8 }, + { 0xb9, 0xd9 }, + { 0xba, 0xda }, + { 0xbb, 0xdb }, + { 0xbc, 0xdc }, + { 0xbd, 0xdd }, + { 0xbe, 0xde }, + { 0xbf, 0xdf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef } +}; + static int -iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xf1 }, - { 0xa2, 0xf2 }, - { 0xa3, 0xf3 }, - { 0xa4, 0xf4 }, - { 0xa5, 0xf5 }, - { 0xa6, 0xf6 }, - { 0xa7, 0xf7 }, - { 0xa8, 0xf8 }, - { 0xa9, 0xf9 }, - { 0xaa, 0xfa }, - { 0xab, 0xfb }, - { 0xac, 0xfc }, - { 0xae, 0xfe }, - { 0xaf, 0xff }, - - { 0xb0, 0xd0 }, - { 0xb1, 0xd1 }, - { 0xb2, 0xd2 }, - { 0xb3, 0xd3 }, - { 0xb4, 0xd4 }, - { 0xb5, 0xd5 }, - { 0xb6, 0xd6 }, - { 0xb7, 0xd7 }, - { 0xb8, 0xd8 }, - { 0xb9, 0xd9 }, - { 0xba, 0xda }, - { 0xbb, 0xdb }, - { 0xbc, 0xdc }, - { 0xbd, 0xdd }, - { 0xbe, 0xdf }, - { 0xbf, 0xdf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xb0 }, - { 0xd1, 0xb1 }, - { 0xd2, 0xb2 }, - { 0xd3, 0xb3 }, - { 0xd4, 0xb4 }, - { 0xd5, 0xb5 }, - { 0xd6, 0xb6 }, - { 0xd7, 0xb7 }, - { 0xd8, 0xb8 }, - { 0xd9, 0xb9 }, - { 0xda, 0xba }, - { 0xdb, 0xbb }, - { 0xdc, 0xbc }, - { 0xdd, 0xbd }, - { 0xde, 0xbe }, - { 0xdf, 0xbf }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf1, 0xa1 }, - { 0xf2, 0xa2 }, - { 0xf3, 0xa3 }, - { 0xf4, 0xa4 }, - { 0xf5, 0xa5 }, - { 0xf6, 0xa6 }, - { 0xf7, 0xa7 }, - { 0xf8, 0xa8 }, - { 0xf9, 0xa9 }, - { 0xfa, 0xaa }, - { 0xfb, 0xab }, - { 0xfc, 0xac }, - { 0xfe, 0xae }, - { 0xff, 0xaf } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_5 = { @@ -271,25 +211,15 @@ OnigEncodingType OnigEncodingISO_8859_5 = { "ISO-8859-5", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_5_mbc_to_normalize, - iso_8859_5_is_mbc_ambiguous, - iso_8859_5_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_5_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c index fffcd0e7d1..ab42eeed31 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_6.c +++ b/ext/mbstring/oniguruma/enc/iso8859_6.c @@ -2,7 +2,7 @@ iso8859_6.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,25 +30,25 @@ #include "regenc.h" #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ - ((EncISO_8859_6_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const unsigned short EncISO_8859_6_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -57,18 +57,18 @@ static const unsigned short EncISO_8859_6_CtypeTable[256] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, - 0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; static int -iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_6_CTYPE(code, ctype); @@ -81,24 +81,15 @@ OnigEncodingType OnigEncodingISO_8859_6 = { "ISO-8859-6", /* name */ 1, /* max enc length */ 1, /* min enc length */ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_6_is_code_ctype, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c index e87661d84b..1090064d74 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_7.c +++ b/ext/mbstring/oniguruma/enc/iso8859_7.c @@ -2,7 +2,7 @@ iso8859_7.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ - ((EncISO_8859_7_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,87 +69,74 @@ static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_7_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2, - 0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000 + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, + 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000 }; static int -iso_8859_7_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } + *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_7_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - if (*p == 0xc0 || *p == 0xe0) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); + v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xc0 || *p == 0xe0) + return FALSE; + else + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_7_CTYPE(code, ctype); @@ -157,121 +144,78 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb6, 0xdc }, + { 0xb8, 0xdd }, + { 0xb9, 0xde }, + { 0xba, 0xdf }, + { 0xbc, 0xfc }, + { 0xbe, 0xfd }, + { 0xbf, 0xfe }, + + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb } +}; + static int -iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xb6, 0xdc }, - { 0xb8, 0xdd }, - { 0xb9, 0xde }, - { 0xba, 0xdf }, - { 0xbc, 0xfc }, - { 0xbe, 0xfd }, - { 0xbf, 0xfe }, - - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xb6 }, - { 0xdd, 0xb8 }, - { 0xde, 0xb9 }, - { 0xdf, 0xba }, - - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xbc }, - { 0xfd, 0xbe }, - { 0xfe, 0xbf } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); } + OnigEncodingType OnigEncodingISO_8859_7 = { onigenc_single_byte_mbc_enc_len, "ISO-8859-7", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_7_mbc_to_normalize, - iso_8859_7_is_mbc_ambiguous, - iso_8859_7_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_7_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c index e76966c667..fb9846f25f 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_8.c +++ b/ext/mbstring/oniguruma/enc/iso8859_8.c @@ -2,7 +2,7 @@ iso8859_8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,45 +30,45 @@ #include "regenc.h" #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ - ((EncISO_8859_8_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const unsigned short EncISO_8859_8_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; static int -iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_8_CTYPE(code, ctype); @@ -81,24 +81,15 @@ OnigEncodingType OnigEncodingISO_8859_8 = { "ISO-8859-8", /* name */ 1, /* max enc length */ 1, /* min enc length */ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_8_is_code_ctype, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c index 16a30c5f24..079d681c21 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_9.c +++ b/ext/mbstring/oniguruma/enc/iso8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ - ((EncISO_8859_9_CtypeTable[code] & ctype) != 0) + ((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,88 +69,86 @@ static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { }; static const unsigned short EncISO_8859_9_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; static int -iso_8859_9_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); - } - else { - *lower = *p; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; } + + *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_9_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf etc.. are lower case letter, but can't convert. */ - if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) - return FALSE; - else - return TRUE; - } + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } - return (v != 0 ? TRUE : FALSE); + (*pp)++; + v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; } - return FALSE; + + return (v != 0 ? TRUE : FALSE); } +#endif static int -iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_9_CTYPE(code, ctype); @@ -158,86 +156,56 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + static int -iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); } OnigEncodingType OnigEncodingISO_8859_9 = { @@ -245,25 +213,15 @@ OnigEncodingType OnigEncodingISO_8859_9 = { "ISO-8859-9", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_9_mbc_to_normalize, - iso_8859_9_is_mbc_ambiguous, - iso_8859_9_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_9_is_code_ctype, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c index d7277e862e..c6649572f5 100644 --- a/ext/mbstring/oniguruma/enc/koi8.c +++ b/ext/mbstring/oniguruma/enc/koi8.c @@ -2,7 +2,7 @@ koi8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c] #define ENC_IS_KOI8_CTYPE(code,ctype) \ - ((EncKOI8_CtypeTable[code] & ctype) != 0) + ((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncKOI8_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,22 +69,22 @@ static const UChar EncKOI8_ToLowerCaseTable[256] = { }; static const unsigned short EncKOI8_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -93,52 +93,46 @@ static const unsigned short EncKOI8_CtypeTable[256] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2 + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 }; + static int -koi8_mbc_to_normalize(OnigAmbigType flag, - const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower) +koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { - const OnigUChar* p = *pp; + const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_KOI8_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } + *lower = ENC_KOI8_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) { const OnigUChar* p = *pp; (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { int v = (EncKOI8_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); return (v != 0 ? TRUE : FALSE); } return FALSE; } - +#endif static int koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -149,89 +143,91 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + static int -koi8_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +koi8_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - { 0xdf, 0xff }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfe, 0xde }, - { 0xff, 0xdf } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); } OnigEncodingType OnigEncodingKOI8 = { @@ -239,24 +235,14 @@ OnigEncodingType OnigEncodingKOI8 = { "KOI8", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - koi8_mbc_to_normalize, - koi8_is_mbc_ambiguous, - koi8_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + koi8_mbc_case_fold, + koi8_apply_all_case_fold, + koi8_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, koi8_is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c index 1010f5ff93..364dda1516 100644 --- a/ext/mbstring/oniguruma/enc/koi8_r.c +++ b/ext/mbstring/oniguruma/enc/koi8_r.c @@ -2,7 +2,7 @@ koi8_r.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ #define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] #define ENC_IS_KOI8_R_CTYPE(code,ctype) \ - ((EncKOI8_R_CtypeTable[code] & ctype) != 0) + ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) static const UChar EncKOI8_R_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -69,75 +69,63 @@ static const UChar EncKOI8_R_ToLowerCaseTable[256] = { }; static const unsigned short EncKOI8_R_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2 + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 }; static int -koi8_r_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } + *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); (*pp)++; - return 1; /* return byte length of converted char to lower */ + return 1; } +#if 0 static int -koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { + int v; const UChar* p = *pp; (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncKOI8_R_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - return (v != 0 ? TRUE : FALSE); - } - return FALSE; + v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); } +#endif static int koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -148,92 +136,60 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) return FALSE; } +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa3, 0xb3 }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff } +}; + static int -koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +koi8_r_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigPairAmbigCodes cc[] = { - { 0xa3, 0xb3 }, - { 0xb3, 0xa3 }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - { 0xdf, 0xff }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfe, 0xde }, - { 0xff, 0xdf } - }; + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; +static int +koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); } OnigEncodingType OnigEncodingKOI8_R = { @@ -241,24 +197,14 @@ OnigEncodingType OnigEncodingKOI8_R = { "KOI8-R", /* name */ 1, /* max enc length */ 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - koi8_r_mbc_to_normalize, - koi8_r_is_mbc_ambiguous, - koi8_r_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, + koi8_r_mbc_case_fold, + koi8_r_apply_all_case_fold, + koi8_r_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, koi8_r_is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c index fcf057423c..285216ebda 100644 --- a/ext/mbstring/oniguruma/enc/mktable.c +++ b/ext/mbstring/oniguruma/enc/mktable.c @@ -2,7 +2,7 @@ mktable.c **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,28 +29,32 @@ #include #include +#include + +#define __USE_ISOC99 +#include -#define NOT_RUBY #include "regenc.h" -#define UNICODE_ISO_8859_1 0 -#define ISO_8859_1 1 -#define ISO_8859_2 2 -#define ISO_8859_3 3 -#define ISO_8859_4 4 -#define ISO_8859_5 5 -#define ISO_8859_6 6 -#define ISO_8859_7 7 -#define ISO_8859_8 8 -#define ISO_8859_9 9 -#define ISO_8859_10 10 -#define ISO_8859_11 11 -#define ISO_8859_13 12 -#define ISO_8859_14 13 -#define ISO_8859_15 14 -#define ISO_8859_16 15 -#define KOI8 16 -#define KOI8_R 17 +#define ASCII 0 +#define UNICODE_ISO_8859_1 1 +#define ISO_8859_1 2 +#define ISO_8859_2 3 +#define ISO_8859_3 4 +#define ISO_8859_4 5 +#define ISO_8859_5 6 +#define ISO_8859_6 7 +#define ISO_8859_7 8 +#define ISO_8859_8 9 +#define ISO_8859_9 10 +#define ISO_8859_10 11 +#define ISO_8859_11 12 +#define ISO_8859_13 13 +#define ISO_8859_14 14 +#define ISO_8859_15 15 +#define ISO_8859_16 16 +#define KOI8 17 +#define KOI8_R 18 typedef struct { int num; @@ -58,6 +62,7 @@ typedef struct { } ENC_INFO; static ENC_INFO Info[] = { + { ASCII, "ASCII" }, { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" }, { ISO_8859_1, "ISO_8859_1" }, { ISO_8859_2, "ISO_8859_2" }, @@ -81,6 +86,9 @@ static ENC_INFO Info[] = { static int IsAlpha(int enc, int c) { + if (enc == ASCII) + return isalpha(c); + if (c >= 0x41 && c <= 0x5a) return 1; if (c >= 0x61 && c <= 0x7a) return 1; @@ -255,6 +263,9 @@ static int IsAlpha(int enc, int c) static int IsBlank(int enc, int c) { + if (enc == ASCII) + return isblank(c); + if (c == 0x09 || c == 0x20) return 1; switch (enc) { @@ -291,6 +302,9 @@ static int IsBlank(int enc, int c) static int IsCntrl(int enc, int c) { + if (enc == ASCII) + return iscntrl(c); + if (c >= 0x00 && c <= 0x1F) return 1; switch (enc) { @@ -328,7 +342,7 @@ static int IsCntrl(int enc, int c) return 0; } -static int IsDigit(int enc, int c) +static int IsDigit(int enc ARG_UNUSED, int c) { if (c >= 0x30 && c <= 0x39) return 1; return 0; @@ -336,6 +350,9 @@ static int IsDigit(int enc, int c) static int IsGraph(int enc, int c) { + if (enc == ASCII) + return isgraph(c); + if (c >= 0x21 && c <= 0x7e) return 1; switch (enc) { @@ -405,6 +422,9 @@ static int IsGraph(int enc, int c) static int IsLower(int enc, int c) { + if (enc == ASCII) + return islower(c); + if (c >= 0x61 && c <= 0x7a) return 1; switch (enc) { @@ -534,6 +554,9 @@ static int IsLower(int enc, int c) static int IsPrint(int enc, int c) { + if (enc == ASCII) + return isprint(c); + if (c >= 0x20 && c <= 0x7e) return 1; switch (enc) { @@ -609,6 +632,9 @@ static int IsPrint(int enc, int c) static int IsPunct(int enc, int c) { + if (enc == ASCII) + return ispunct(c); + if (enc == UNICODE_ISO_8859_1) { if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 || c == 0x7c || c == 0x7e) return 1; @@ -705,6 +731,9 @@ static int IsPunct(int enc, int c) static int IsSpace(int enc, int c) { + if (enc == ASCII) + return isspace(c); + if (c >= 0x09 && c <= 0x0d) return 1; if (c == 0x20) return 1; @@ -744,6 +773,9 @@ static int IsSpace(int enc, int c) static int IsUpper(int enc, int c) { + if (enc == ASCII) + return isupper(c); + if (c >= 0x41 && c <= 0x5a) return 1; switch (enc) { @@ -868,6 +900,9 @@ static int IsUpper(int enc, int c) static int IsXDigit(int enc, int c) { + if (enc == ASCII) + return isxdigit(c); + if (c >= 0x30 && c <= 0x39) return 1; if (c >= 0x41 && c <= 0x46) return 1; if (c >= 0x61 && c <= 0x66) return 1; @@ -876,6 +911,10 @@ static int IsXDigit(int enc, int c) static int IsWord(int enc, int c) { + if (enc == ASCII) { + return (isalpha(c) || isdigit(c) || c == 0x5f); + } + if (c >= 0x30 && c <= 0x39) return 1; if (c >= 0x41 && c <= 0x5a) return 1; if (c == 0x5f) return 1; @@ -1052,13 +1091,13 @@ static int IsWord(int enc, int c) return 0; } -static int IsAscii(int enc, int c) +static int IsAscii(int enc ARG_UNUSED, int c) { if (c >= 0x00 && c <= 0x7f) return 1; return 0; } -static int IsNewline(int enc, int c) +static int IsNewline(int enc ARG_UNUSED, int c) { if (c == 0x0a) return 1; return 0; @@ -1072,25 +1111,25 @@ static int exec(FILE* fp, ENC_INFO* einfo) enc = einfo->num; - fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n", + fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", einfo->name); for (c = 0; c < 256; c++) { val = 0; - if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE; - if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA; - if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK; - if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL; - if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT; - if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH; - if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER; - if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT; - if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT; - if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE; - if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER; - if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT; - if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD; - if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII; + if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE; + if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM); + if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK; + if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL; + if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM); + if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH; + if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER; + if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT; + if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT; + if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE; + if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER; + if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT; + if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; + if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; if (c % NCOL == 0) fputs(" ", fp); fprintf(fp, "0x%04x", val); @@ -1104,12 +1143,20 @@ static int exec(FILE* fp, ENC_INFO* einfo) return 0; } -extern int main(int argc, char* argv[]) +extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) { int i; FILE* fp = stdout; - for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) { + setlocale(LC_ALL, "C"); + /* setlocale(LC_ALL, "POSIX"); */ + /* setlocale(LC_ALL, "en_GB.iso88591"); */ + /* setlocale(LC_ALL, "de_BE.iso88591"); */ + /* setlocale(LC_ALL, "fr_FR.iso88591"); */ + + for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) { exec(fp, &Info[i]); } + + return 0; } diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c index f7d7d52265..7a54c9fb3a 100644 --- a/ext/mbstring/oniguruma/enc/sjis.c +++ b/ext/mbstring/oniguruma/enc/sjis.c @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" static const int EncLen_SJIS[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -71,13 +71,13 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { #define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] static int -sjis_mbc_enc_len(const UChar* p) +mbc_enc_len(const UChar* p) { return EncLen_SJIS[*p]; } static int -sjis_code_to_mbclen(OnigCodePoint code) +code_to_mbclen(OnigCodePoint code) { if (code < 256) { if (EncLen_SJIS[(int )code] == 1) @@ -89,16 +89,16 @@ sjis_code_to_mbclen(OnigCodePoint code) return 2; } else - return 0; + return ONIGERR_INVALID_CODE_POINT_VALUE; } static OnigCodePoint -sjis_mbc_to_code(const UChar* p, const UChar* end) +mbc_to_code(const UChar* p, const UChar* end) { int c, i, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_SJIS, p); + len = enclen(ONIG_ENCODING_SJIS, p); c = *p++; n = c; if (len == 1) return n; @@ -112,7 +112,7 @@ sjis_mbc_to_code(const UChar* p, const UChar* end) } static int -sjis_code_to_mbc(OnigCodePoint code, UChar *buf) +code_to_mbc(OnigCodePoint code, UChar *buf) { UChar *p = buf; @@ -120,67 +120,63 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 0 - if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf)) - return REGERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf)) + return REGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } static int -sjis_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); (*pp)++; return 1; } else { - int len = enc_len(ONIG_ENCODING_SJIS, p); + int i; + int len = enclen(ONIG_ENCODING_SJIS, p); - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } + for (i = 0; i < len; i++) { + *lower++ = *p++; } (*pp) += len; return len; /* return byte length of converted char to lower */ } } +#if 0 static int -sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); } +#endif +#if 0 static int -sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); else { - if ((ctype & (ONIGENC_CTYPE_WORD | - ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { - return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE); + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); } } return FALSE; } +#endif static UChar* -sjis_left_adjust_char_head(const UChar* start, const UChar* s) +left_adjust_char_head(const UChar* start, const UChar* s) { const UChar *p; int len; @@ -196,43 +192,127 @@ sjis_left_adjust_char_head(const UChar* start, const UChar* s) } } } - len = enc_len(ONIG_ENCODING_SJIS, p); + len = enclen(ONIG_ENCODING_SJIS, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); } static int -sjis_is_allowed_reverse_match(const UChar* s, const UChar* end) +is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) { const UChar c = *s; return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); } + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0x829f, 0x82f1 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 4, + 0x00a6, 0x00af, + 0x00b1, 0x00dd, + 0x8340, 0x837e, + 0x8380, 0x8396, +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + hash_data_type ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return (int )ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + OnigEncodingType OnigEncodingSJIS = { - sjis_mbc_enc_len, + mbc_enc_len, "Shift_JIS", /* name */ 2, /* max byte length */ 1, /* min byte length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, onigenc_is_mbc_newline_0x0a, - sjis_mbc_to_code, - sjis_code_to_mbclen, - sjis_code_to_mbc, - sjis_mbc_to_normalize, - sjis_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - sjis_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - sjis_left_adjust_char_head, - sjis_is_allowed_reverse_match + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c index a8cf539014..af7a86e088 100644 --- a/ext/mbstring/oniguruma/enc/unicode.c +++ b/ext/mbstring/oniguruma/enc/unicode.c @@ -2,7 +2,7 @@ unicode.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,51 +27,59 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" +#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#if 0 +#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0) +#endif -const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, +static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; -static const OnigCodePoint CRAlnum[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 414, -#else - 9, -#endif - 0x0030, 0x0039, +/* 'NEWLINE' */ +static const OnigCodePoint CR_NEWLINE[] = { + 1, + 0x000a, 0x000a +}; /* CR_NEWLINE */ + +/* 'Alpha': [[:Alpha:]] */ +static const OnigCodePoint CR_Alpha[] = { + 418, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -79,15 +87,12 @@ static const OnigCodePoint CRAlnum[] = { 0x00ba, 0x00ba, 0x00c0, 0x00d6, 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , + 0x00f8, 0x0241, 0x0250, 0x02c1, 0x02c6, 0x02d1, 0x02e0, 0x02e4, 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, + 0x0300, 0x036f, 0x037a, 0x037a, 0x0386, 0x0386, 0x0388, 0x038a, @@ -95,41 +100,39 @@ static const OnigCodePoint CRAlnum[] = { 0x038e, 0x03a1, 0x03a3, 0x03ce, 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, + 0x03f7, 0x0481, 0x0483, 0x0486, 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, + 0x04d0, 0x04f9, 0x0500, 0x050f, 0x0531, 0x0556, 0x0559, 0x0559, 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, + 0x0591, 0x05b9, 0x05bb, 0x05bd, 0x05bf, 0x05bf, 0x05c1, 0x05c2, - 0x05c4, 0x05c4, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, 0x05d0, 0x05ea, 0x05f0, 0x05f2, 0x0610, 0x0615, 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, + 0x0640, 0x065e, 0x066e, 0x06d3, 0x06d5, 0x06dc, 0x06de, 0x06e8, - 0x06ea, 0x06fc, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, 0x06ff, 0x06ff, 0x0710, 0x074a, - 0x074d, 0x074f, + 0x074d, 0x076d, 0x0780, 0x07b1, 0x0901, 0x0939, 0x093c, 0x094d, 0x0950, 0x0954, 0x0958, 0x0963, - 0x0966, 0x096f, + 0x097d, 0x097d, 0x0981, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -139,11 +142,11 @@ static const OnigCodePoint CRAlnum[] = { 0x09b6, 0x09b9, 0x09bc, 0x09c4, 0x09c7, 0x09c8, - 0x09cb, 0x09cd, + 0x09cb, 0x09ce, 0x09d7, 0x09d7, 0x09dc, 0x09dd, 0x09df, 0x09e3, - 0x09e6, 0x09f1, + 0x09f0, 0x09f1, 0x0a01, 0x0a03, 0x0a05, 0x0a0a, 0x0a0f, 0x0a10, @@ -158,7 +161,7 @@ static const OnigCodePoint CRAlnum[] = { 0x0a4b, 0x0a4d, 0x0a59, 0x0a5c, 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, + 0x0a70, 0x0a74, 0x0a81, 0x0a83, 0x0a85, 0x0a8d, 0x0a8f, 0x0a91, @@ -171,7 +174,6 @@ static const OnigCodePoint CRAlnum[] = { 0x0acb, 0x0acd, 0x0ad0, 0x0ad0, 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, 0x0b01, 0x0b03, 0x0b05, 0x0b0c, 0x0b0f, 0x0b10, @@ -185,7 +187,6 @@ static const OnigCodePoint CRAlnum[] = { 0x0b56, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, 0x0b71, 0x0b71, 0x0b82, 0x0b83, 0x0b85, 0x0b8a, @@ -196,13 +197,11 @@ static const OnigCodePoint CRAlnum[] = { 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, + 0x0bae, 0x0bb9, 0x0bbe, 0x0bc2, 0x0bc6, 0x0bc8, 0x0bca, 0x0bcd, 0x0bd7, 0x0bd7, - 0x0be7, 0x0bef, 0x0c01, 0x0c03, 0x0c05, 0x0c0c, 0x0c0e, 0x0c10, @@ -214,7 +213,6 @@ static const OnigCodePoint CRAlnum[] = { 0x0c4a, 0x0c4d, 0x0c55, 0x0c56, 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, 0x0c82, 0x0c83, 0x0c85, 0x0c8c, 0x0c8e, 0x0c90, @@ -227,7 +225,6 @@ static const OnigCodePoint CRAlnum[] = { 0x0cd5, 0x0cd6, 0x0cde, 0x0cde, 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, 0x0d02, 0x0d03, 0x0d05, 0x0d0c, 0x0d0e, 0x0d10, @@ -238,7 +235,6 @@ static const OnigCodePoint CRAlnum[] = { 0x0d4a, 0x0d4d, 0x0d57, 0x0d57, 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, 0x0d82, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, @@ -252,7 +248,6 @@ static const OnigCodePoint CRAlnum[] = { 0x0df2, 0x0df3, 0x0e01, 0x0e3a, 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, 0x0e81, 0x0e82, 0x0e84, 0x0e84, 0x0e87, 0x0e88, @@ -269,11 +264,9 @@ static const OnigCodePoint CRAlnum[] = { 0x0ec0, 0x0ec4, 0x0ec6, 0x0ec6, 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, 0x0edc, 0x0edd, 0x0f00, 0x0f00, 0x0f18, 0x0f19, - 0x0f20, 0x0f29, 0x0f35, 0x0f35, 0x0f37, 0x0f37, 0x0f39, 0x0f39, @@ -289,39 +282,31 @@ static const OnigCodePoint CRAlnum[] = { 0x1029, 0x102a, 0x102c, 0x1032, 0x1036, 0x1039, - 0x1040, 0x1049, 0x1050, 0x1059, 0x10a0, 0x10c5, - 0x10d0, 0x10f8, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, 0x1100, 0x1159, 0x115f, 0x11a2, 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, + 0x1200, 0x1248, 0x124a, 0x124d, 0x1250, 0x1256, 0x1258, 0x1258, 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, + 0x1260, 0x1288, 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, + 0x1290, 0x12b0, 0x12b2, 0x12b5, 0x12b8, 0x12be, 0x12c0, 0x12c0, 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x1371, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, 0x13a0, 0x13f4, 0x1401, 0x166c, 0x166f, 0x1676, @@ -338,17 +323,18 @@ static const OnigCodePoint CRAlnum[] = { 0x17b6, 0x17d3, 0x17d7, 0x17d7, 0x17dc, 0x17dd, - 0x17e0, 0x17e9, 0x180b, 0x180d, - 0x1810, 0x1819, 0x1820, 0x1877, 0x1880, 0x18a9, 0x1900, 0x191c, 0x1920, 0x192b, 0x1930, 0x193b, - 0x1946, 0x196d, + 0x1950, 0x196d, 0x1970, 0x1974, - 0x1d00, 0x1d6b, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, 0x1e00, 0x1e9b, 0x1ea0, 0x1ef9, 0x1f00, 0x1f15, @@ -372,7 +358,8 @@ static const OnigCodePoint CRAlnum[] = { 0x1ff6, 0x1ffc, 0x2071, 0x2071, 0x207f, 0x207f, - 0x20d0, 0x20ea, + 0x2090, 0x2094, + 0x20d0, 0x20eb, 0x2102, 0x2102, 0x2107, 0x2107, 0x210a, 0x2113, @@ -384,8 +371,23 @@ static const OnigCodePoint CRAlnum[] = { 0x212a, 0x212d, 0x212f, 0x2131, 0x2133, 0x2139, - 0x213d, 0x213f, + 0x213c, 0x213f, 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, 0x3005, 0x3006, 0x302a, 0x302f, 0x3031, 0x3035, @@ -400,11 +402,13 @@ static const OnigCodePoint CRAlnum[] = { 0x31a0, 0x31b7, 0x31f0, 0x31ff, 0x3400, 0x4db5, - 0x4e00, 0x9fa5, + 0x4e00, 0x9fbb, 0xa000, 0xa48c, + 0xa800, 0xa827, 0xac00, 0xd7a3, 0xf900, 0xfa2d, 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, 0xfb00, 0xfb06, 0xfb13, 0xfb17, 0xfb1d, 0xfb28, @@ -422,7 +426,6 @@ static const OnigCodePoint CRAlnum[] = { 0xfe20, 0xfe23, 0xfe70, 0xfe74, 0xfe76, 0xfefc, - 0xff10, 0xff19, 0xff21, 0xff3a, 0xff41, 0xff5a, 0xff66, 0xffbe, @@ -440,19 +443,28 @@ static const OnigCodePoint CRAlnum[] = { 0x10300, 0x1031e, 0x10330, 0x10349, 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, 0x10400, 0x1049d, - 0x104a0, 0x104a9, 0x10800, 0x10805, 0x10808, 0x10808, 0x1080a, 0x10835, 0x10837, 0x10838, 0x1083c, 0x1083c, 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, 0x1d165, 0x1d169, 0x1d16d, 0x1d172, 0x1d17b, 0x1d182, 0x1d185, 0x1d18b, 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, 0x1d400, 0x1d454, 0x1d456, 0x1d49c, 0x1d49e, 0x1d49f, @@ -471,7 +483,7 @@ static const OnigCodePoint CRAlnum[] = { 0x1d540, 0x1d544, 0x1d546, 0x1d546, 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, + 0x1d552, 0x1d6a5, 0x1d6a8, 0x1d6c0, 0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6fa, @@ -483,76 +495,117 @@ static const OnigCodePoint CRAlnum[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, 0x20000, 0x2a6d6, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRAlnum */ +}; /* CR_Alpha */ -static const OnigCodePoint CRAlpha[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 396, -#else - 8, -#endif - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, +/* 'Blank': [[:Blank:]] */ +static const OnigCodePoint CR_Blank[] = { + 9, + 0x0009, 0x0009, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Blank */ + +/* 'Cntrl': [[:Cntrl:]] */ +static const OnigCodePoint CR_Cntrl[] = { + 19, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Cntrl */ + +/* 'Digit': [[:Digit:]] */ +static const OnigCodePoint CR_Digit[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Digit */ + +/* 'Graph': [[:Graph:]] */ +static const OnigCodePoint CR_Graph[] = { + 424, + 0x0021, 0x007e, + 0x00a1, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, + 0x037e, 0x037e, + 0x0384, 0x038a, 0x038c, 0x038c, 0x038e, 0x03a1, 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, + 0x03d0, 0x0486, 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, + 0x04d0, 0x04f9, 0x0500, 0x050f, 0x0531, 0x0556, - 0x0559, 0x0559, + 0x0559, 0x055f, 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, 0x0621, 0x063a, - 0x0640, 0x0658, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, 0x0780, 0x07b1, 0x0901, 0x0939, 0x093c, 0x094d, 0x0950, 0x0954, - 0x0958, 0x0963, + 0x0958, 0x0970, + 0x097d, 0x097d, 0x0981, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -562,11 +615,11 @@ static const OnigCodePoint CRAlpha[] = { 0x09b6, 0x09b9, 0x09bc, 0x09c4, 0x09c7, 0x09c8, - 0x09cb, 0x09cd, + 0x09cb, 0x09ce, 0x09d7, 0x09d7, 0x09dc, 0x09dd, 0x09df, 0x09e3, - 0x09f0, 0x09f1, + 0x09e6, 0x09fa, 0x0a01, 0x0a03, 0x0a05, 0x0a0a, 0x0a0f, 0x0a10, @@ -581,7 +634,7 @@ static const OnigCodePoint CRAlpha[] = { 0x0a4b, 0x0a4d, 0x0a59, 0x0a5c, 0x0a5e, 0x0a5e, - 0x0a70, 0x0a74, + 0x0a66, 0x0a74, 0x0a81, 0x0a83, 0x0a85, 0x0a8d, 0x0a8f, 0x0a91, @@ -594,6 +647,8 @@ static const OnigCodePoint CRAlpha[] = { 0x0acb, 0x0acd, 0x0ad0, 0x0ad0, 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, 0x0b01, 0x0b03, 0x0b05, 0x0b0c, 0x0b0f, 0x0b10, @@ -607,7 +662,7 @@ static const OnigCodePoint CRAlpha[] = { 0x0b56, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, + 0x0b66, 0x0b71, 0x0b82, 0x0b83, 0x0b85, 0x0b8a, 0x0b8e, 0x0b90, @@ -617,12 +672,12 @@ static const OnigCodePoint CRAlpha[] = { 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, + 0x0bae, 0x0bb9, 0x0bbe, 0x0bc2, 0x0bc6, 0x0bc8, 0x0bca, 0x0bcd, 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, 0x0c01, 0x0c03, 0x0c05, 0x0c0c, 0x0c0e, 0x0c10, @@ -634,6 +689,7 @@ static const OnigCodePoint CRAlpha[] = { 0x0c4a, 0x0c4d, 0x0c55, 0x0c56, 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, 0x0c82, 0x0c83, 0x0c85, 0x0c8c, 0x0c8e, 0x0c90, @@ -646,6 +702,7 @@ static const OnigCodePoint CRAlpha[] = { 0x0cd5, 0x0cd6, 0x0cde, 0x0cde, 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, 0x0d02, 0x0d03, 0x0d05, 0x0d0c, 0x0d0e, 0x0d10, @@ -656,6 +713,7 @@ static const OnigCodePoint CRAlpha[] = { 0x0d4a, 0x0d4d, 0x0d57, 0x0d57, 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, 0x0d82, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, @@ -666,9 +724,9 @@ static const OnigCodePoint CRAlpha[] = { 0x0dcf, 0x0dd4, 0x0dd6, 0x0dd6, 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, + 0x0df2, 0x0df4, 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, + 0x0e3f, 0x0e5b, 0x0e81, 0x0e82, 0x0e84, 0x0e84, 0x0e87, 0x0e88, @@ -685,80 +743,74 @@ static const OnigCodePoint CRAlpha[] = { 0x0ec0, 0x0ec4, 0x0ec6, 0x0ec6, 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, + 0x0f00, 0x0f47, 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, + 0x0f71, 0x0f8b, 0x0f90, 0x0f97, 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, 0x1000, 0x1021, 0x1023, 0x1027, 0x1029, 0x102a, 0x102c, 0x1032, 0x1036, 0x1039, - 0x1050, 0x1059, + 0x1040, 0x1059, 0x10a0, 0x10c5, - 0x10d0, 0x10f8, + 0x10d0, 0x10fc, 0x1100, 0x1159, 0x115f, 0x11a2, 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, + 0x1200, 0x1248, 0x124a, 0x124d, 0x1250, 0x1256, 0x1258, 0x1258, 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, + 0x1260, 0x1288, 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, + 0x1290, 0x12b0, 0x12b2, 0x12b5, 0x12b8, 0x12be, 0x12c0, 0x12c0, 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, 0x1700, 0x170c, 0x170e, 0x1714, - 0x1720, 0x1734, + 0x1720, 0x1736, 0x1740, 0x1753, 0x1760, 0x176c, 0x176e, 0x1770, 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x180b, 0x180d, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, 0x1820, 0x1877, 0x1880, 0x18a9, 0x1900, 0x191c, 0x1920, 0x192b, 0x1930, 0x193b, - 0x1950, 0x196d, + 0x1940, 0x1940, + 0x1944, 0x196d, 0x1970, 0x1974, - 0x1d00, 0x1d6b, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, 0x1e00, 0x1e9b, 0x1ea0, 0x1ef9, 0x1f00, 0x1f15, @@ -771,74 +823,111 @@ static const OnigCodePoint CRAlpha[] = { 0x1f5d, 0x1f5d, 0x1f5f, 0x1f7d, 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, + 0x1fdd, 0x1fef, 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x205e, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, + 0x3099, 0x30ff, 0x3105, 0x312c, 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, 0xfb00, 0xfb06, 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, + 0xfb1d, 0xfb36, 0xfb38, 0xfb3c, 0xfb3e, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, + 0xfbd3, 0xfd3f, 0xfd50, 0xfd8f, 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, 0xfe70, 0xfe74, 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, 0xffc2, 0xffc7, 0xffca, 0xffcf, 0xffd2, 0xffd7, 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, 0x10000, 0x1000b, 0x1000d, 0x10026, 0x10028, 0x1003a, @@ -846,21 +935,36 @@ static const OnigCodePoint CRAlpha[] = { 0x1003f, 0x1004d, 0x10050, 0x1005d, 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, 0x10300, 0x1031e, - 0x10330, 0x10349, + 0x10320, 0x10323, + 0x10330, 0x1034a, 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, 0x10400, 0x1049d, + 0x104a0, 0x104a9, 0x10800, 0x10805, 0x10808, 0x10808, 0x1080a, 0x10835, 0x10837, 0x10838, 0x1083c, 0x1083c, 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, 0x1d400, 0x1d454, 0x1d456, 0x1d49c, 0x1d49e, 0x1d49f, @@ -879,538 +983,27 @@ static const OnigCodePoint CRAlpha[] = { 0x1d540, 0x1d544, 0x1d546, 0x1d546, 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, 0x20000, 0x2a6d6, 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRAlpha */ - -static const OnigCodePoint CRBlank[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 9, -#else - 3, -#endif - 0x0009, 0x0009, - 0x0020, 0x0020, - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRBlank */ - -static const OnigCodePoint CRCntrl[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 19, -#else - 3, -#endif - 0x0000, 0x001f, - 0x007f, 0x009f, - 0x00ad, 0x00ad -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xd800, 0xf8ff, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, 0xf0000, 0xffffd, 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRCntrl */ - -static const OnigCodePoint CRDigit[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 23, -#else - 1, -#endif - 0x0030, 0x0039 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be7, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x1369, 0x1371, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRDigit */ +}; /* CR_Graph */ -static const OnigCodePoint CRGraph[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 405, -#else - 2, -#endif - 0x0021, 0x007e, - 0x00a1, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1681, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x200b, 0x2027, - 0x202a, 0x202e, - 0x2030, 0x2054, - 0x2057, 0x2057, - 0x2060, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3001, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRGraph */ - -static const OnigCodePoint CRLower[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 424, -#else - 6, -#endif +/* 'Lower': [[:Lower:]] */ +static const OnigCodePoint CR_Lower[] = { + 480, 0x0061, 0x007a, 0x00aa, 0x00aa, 0x00b5, 0x00b5, 0x00ba, 0x00ba, 0x00df, 0x00f6, - 0x00f8, 0x00ff -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , + 0x00f8, 0x00ff, 0x0101, 0x0101, 0x0103, 0x0103, 0x0105, 0x0105, @@ -1543,7 +1136,9 @@ static const OnigCodePoint CRLower[] = { 0x022d, 0x022d, 0x022f, 0x022f, 0x0231, 0x0231, - 0x0233, 0x0236, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, 0x0250, 0x02af, 0x0390, 0x0390, 0x03ac, 0x03ce, @@ -1563,7 +1158,7 @@ static const OnigCodePoint CRLower[] = { 0x03ef, 0x03f3, 0x03f5, 0x03f5, 0x03f8, 0x03f8, - 0x03fb, 0x03fb, + 0x03fb, 0x03fc, 0x0430, 0x045f, 0x0461, 0x0461, 0x0463, 0x0463, @@ -1635,6 +1230,7 @@ static const OnigCodePoint CRLower[] = { 0x04f1, 0x04f1, 0x04f3, 0x04f3, 0x04f5, 0x04f5, + 0x04f7, 0x04f7, 0x04f9, 0x04f9, 0x0501, 0x0501, 0x0503, 0x0503, @@ -1646,7 +1242,8 @@ static const OnigCodePoint CRLower[] = { 0x050f, 0x050f, 0x0561, 0x0587, 0x1d00, 0x1d2b, - 0x1d62, 0x1d6b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, 0x1e01, 0x1e01, 0x1e03, 0x1e03, 0x1e05, 0x1e05, @@ -1796,8 +1393,60 @@ static const OnigCodePoint CRLower[] = { 0x212f, 0x212f, 0x2134, 0x2134, 0x2139, 0x2139, - 0x213d, 0x213d, + 0x213c, 0x213d, 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, 0xfb00, 0xfb06, 0xfb13, 0xfb17, 0xff41, 0xff5a, @@ -1818,7 +1467,7 @@ static const OnigCodePoint CRLower[] = { 0x1d5ee, 0x1d607, 0x1d622, 0x1d63b, 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a3, + 0x1d68a, 0x1d6a5, 0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6e1, 0x1d6fc, 0x1d714, @@ -1829,23 +1478,16 @@ static const OnigCodePoint CRLower[] = { 0x1d78a, 0x1d78f, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7c9 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRLower */ +}; /* CR_Lower */ -static const OnigCodePoint CRPrint[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 405, -#else - 4, -#endif +/* 'Print': [[:Print:]] */ +static const OnigCodePoint CR_Print[] = { + 423, 0x0009, 0x000d, 0x0020, 0x007e, 0x0085, 0x0085, - 0x00a0, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, + 0x00a0, 0x0241, + 0x0250, 0x036f, 0x0374, 0x0375, 0x037a, 0x037a, 0x037e, 0x037e, @@ -1853,35 +1495,33 @@ static const OnigCodePoint CRPrint[] = { 0x038c, 0x038c, 0x038e, 0x03a1, 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, + 0x03d0, 0x0486, 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, + 0x04d0, 0x04f9, 0x0500, 0x050f, 0x0531, 0x0556, 0x0559, 0x055f, 0x0561, 0x0587, 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, 0x05d0, 0x05ea, 0x05f0, 0x05f4, 0x0600, 0x0603, - 0x060c, 0x0615, + 0x060b, 0x0615, 0x061b, 0x061b, - 0x061f, 0x061f, + 0x061e, 0x061f, 0x0621, 0x063a, - 0x0640, 0x0658, + 0x0640, 0x065e, 0x0660, 0x070d, 0x070f, 0x074a, - 0x074d, 0x074f, + 0x074d, 0x076d, 0x0780, 0x07b1, 0x0901, 0x0939, 0x093c, 0x094d, 0x0950, 0x0954, 0x0958, 0x0970, + 0x097d, 0x097d, 0x0981, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -1891,7 +1531,7 @@ static const OnigCodePoint CRPrint[] = { 0x09b6, 0x09b9, 0x09bc, 0x09c4, 0x09c7, 0x09c8, - 0x09cb, 0x09cd, + 0x09cb, 0x09ce, 0x09d7, 0x09d7, 0x09dc, 0x09dd, 0x09df, 0x09e3, @@ -1948,13 +1588,12 @@ static const OnigCodePoint CRPrint[] = { 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, + 0x0bae, 0x0bb9, 0x0bbe, 0x0bc2, 0x0bc6, 0x0bc8, 0x0bca, 0x0bcd, 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, + 0x0be6, 0x0bfa, 0x0c01, 0x0c03, 0x0c05, 0x0c0c, 0x0c0e, 0x0c10, @@ -2028,7 +1667,7 @@ static const OnigCodePoint CRPrint[] = { 0x0f90, 0x0f97, 0x0f99, 0x0fbc, 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, + 0x0fcf, 0x0fd1, 0x1000, 0x1021, 0x1023, 0x1027, 0x1029, 0x102a, @@ -2036,37 +1675,28 @@ static const OnigCodePoint CRPrint[] = { 0x1036, 0x1039, 0x1040, 0x1059, 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, + 0x10d0, 0x10fc, 0x1100, 0x1159, 0x115f, 0x11a2, 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, + 0x1200, 0x1248, 0x124a, 0x124d, 0x1250, 0x1256, 0x1258, 0x1258, 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, + 0x1260, 0x1288, 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, + 0x1290, 0x12b0, 0x12b2, 0x12b5, 0x12b8, 0x12be, 0x12c0, 0x12c0, 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, 0x13a0, 0x13f4, 0x1401, 0x1676, 0x1680, 0x169c, @@ -2091,8 +1721,12 @@ static const OnigCodePoint CRPrint[] = { 0x1940, 0x1940, 0x1944, 0x196d, 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, 0x1e00, 0x1e9b, 0x1ea0, 0x1ef9, 0x1f00, 0x1f15, @@ -2111,23 +1745,19 @@ static const OnigCodePoint CRPrint[] = { 0x1fdd, 0x1fef, 0x1ff2, 0x1ff4, 0x1ff6, 0x1ffe, - 0x2000, 0x2054, - 0x2057, 0x2057, - 0x205f, 0x2063, + 0x2000, 0x2063, 0x206a, 0x2071, 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, 0x2153, 0x2183, - 0x2190, 0x23d0, + 0x2190, 0x23db, 0x2400, 0x2426, 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, + 0x2460, 0x269c, + 0x26a0, 0x26b1, 0x2701, 0x2704, 0x2706, 0x2709, 0x270c, 0x2727, @@ -2139,8 +1769,26 @@ static const OnigCodePoint CRPrint[] = { 0x2761, 0x2794, 0x2798, 0x27af, 0x27b1, 0x27be, + 0x27c0, 0x27c6, 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -2151,17 +1799,20 @@ static const OnigCodePoint CRPrint[] = { 0x3105, 0x312c, 0x3131, 0x318e, 0x3190, 0x31b7, + 0x31c0, 0x31cf, 0x31f0, 0x321e, 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, + 0x3250, 0x32fe, 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, + 0x4dc0, 0x9fbb, 0xa000, 0xa48c, 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, 0xac00, 0xd7a3, 0xe000, 0xfa2d, 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, 0xfb00, 0xfb06, 0xfb13, 0xfb17, 0xfb1d, 0xfb36, @@ -2174,7 +1825,7 @@ static const OnigCodePoint CRPrint[] = { 0xfd50, 0xfd8f, 0xfd92, 0xfdc7, 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, + 0xfe00, 0xfe19, 0xfe20, 0xfe23, 0xfe30, 0xfe52, 0xfe54, 0xfe66, @@ -2199,12 +1850,13 @@ static const OnigCodePoint CRPrint[] = { 0x10080, 0x100fa, 0x10100, 0x10102, 0x10107, 0x10133, - 0x10137, 0x1013f, + 0x10137, 0x1018a, 0x10300, 0x1031e, 0x10320, 0x10323, 0x10330, 0x1034a, 0x10380, 0x1039d, - 0x1039f, 0x1039f, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, 0x10400, 0x1049d, 0x104a0, 0x104a9, 0x10800, 0x10805, @@ -2213,9 +1865,18 @@ static const OnigCodePoint CRPrint[] = { 0x10837, 0x10838, 0x1083c, 0x1083c, 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, 0x1d000, 0x1d0f5, 0x1d100, 0x1d126, 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, 0x1d300, 0x1d356, 0x1d400, 0x1d454, 0x1d456, 0x1d49c, @@ -2235,7 +1896,7 @@ static const OnigCodePoint CRPrint[] = { 0x1d540, 0x1d544, 0x1d546, 0x1d546, 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, + 0x1d552, 0x1d6a5, 0x1d6a8, 0x1d7c9, 0x1d7ce, 0x1d7ff, 0x20000, 0x2a6d6, @@ -2245,15 +1906,11 @@ static const OnigCodePoint CRPrint[] = { 0xe0100, 0xe01ef, 0xf0000, 0xffffd, 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRPrint */ +}; /* CR_Print */ -static const OnigCodePoint CRPunct[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 86, -#else - 14, -#endif +/* 'Punct': [[:Punct:]] */ +static const OnigCodePoint CR_Punct[] = { + 96, 0x0021, 0x0023, 0x0025, 0x002a, 0x002c, 0x002f, @@ -2267,9 +1924,7 @@ static const OnigCodePoint CRPunct[] = { 0x00ab, 0x00ab, 0x00b7, 0x00b7, 0x00bb, 0x00bb, - 0x00bf, 0x00bf -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , + 0x00bf, 0x00bf, 0x037e, 0x037e, 0x0387, 0x0387, 0x055a, 0x055f, @@ -2277,10 +1932,11 @@ static const OnigCodePoint CRPunct[] = { 0x05be, 0x05be, 0x05c0, 0x05c0, 0x05c3, 0x05c3, + 0x05c6, 0x05c6, 0x05f3, 0x05f4, 0x060c, 0x060d, 0x061b, 0x061b, - 0x061f, 0x061f, + 0x061e, 0x061f, 0x066a, 0x066d, 0x06d4, 0x06d4, 0x0700, 0x070d, @@ -2292,6 +1948,7 @@ static const OnigCodePoint CRPunct[] = { 0x0f04, 0x0f12, 0x0f3a, 0x0f3d, 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, 0x104a, 0x104f, 0x10fb, 0x10fb, 0x1361, 0x1368, @@ -2303,20 +1960,26 @@ static const OnigCodePoint CRPunct[] = { 0x17d8, 0x17da, 0x1800, 0x180a, 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, 0x2010, 0x2027, 0x2030, 0x2043, 0x2045, 0x2051, - 0x2053, 0x2054, - 0x2057, 0x2057, + 0x2053, 0x205e, 0x207d, 0x207e, 0x208d, 0x208e, 0x2329, 0x232a, 0x23b4, 0x23b6, 0x2768, 0x2775, + 0x27c5, 0x27c6, 0x27e6, 0x27eb, 0x2983, 0x2998, 0x29d8, 0x29db, 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, 0x3001, 0x3003, 0x3008, 0x3011, 0x3014, 0x301f, @@ -2325,6 +1988,7 @@ static const OnigCodePoint CRPunct[] = { 0x30a0, 0x30a0, 0x30fb, 0x30fb, 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, 0xfe30, 0xfe52, 0xfe54, 0xfe61, 0xfe63, 0xfe63, @@ -2341,22 +2005,17 @@ static const OnigCodePoint CRPunct[] = { 0xff5d, 0xff5d, 0xff5f, 0xff65, 0x10100, 0x10101, - 0x1039f, 0x1039f -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRPunct */ + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Punct */ -static const OnigCodePoint CRSpace[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE +/* 'Space': [[:Space:]] */ +static const OnigCodePoint CR_Space[] = { 11, -#else - 4, -#endif 0x0009, 0x000d, 0x0020, 0x0020, 0x0085, 0x0085, - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , + 0x00a0, 0x00a0, 0x1680, 0x1680, 0x180e, 0x180e, 0x2000, 0x200a, @@ -2364,20 +2023,14 @@ static const OnigCodePoint CRSpace[] = { 0x202f, 0x202f, 0x205f, 0x205f, 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRSpace */ +}; /* CR_Space */ -static const OnigCodePoint CRUpper[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 421, -#else - 3, -#endif +/* 'Upper': [[:Upper:]] */ +static const OnigCodePoint CR_Upper[] = { + 476, 0x0041, 0x005a, 0x00c0, 0x00d6, - 0x00d8, 0x00de -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , + 0x00d8, 0x00de, 0x0100, 0x0100, 0x0102, 0x0102, 0x0104, 0x0104, @@ -2511,6 +2164,9 @@ static const OnigCodePoint CRUpper[] = { 0x022e, 0x022e, 0x0230, 0x0230, 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, 0x0386, 0x0386, 0x0388, 0x038a, 0x038c, 0x038c, @@ -2533,7 +2189,7 @@ static const OnigCodePoint CRUpper[] = { 0x03f4, 0x03f4, 0x03f7, 0x03f7, 0x03f9, 0x03fa, - 0x0400, 0x042f, + 0x03fd, 0x042f, 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, @@ -2604,6 +2260,7 @@ static const OnigCodePoint CRUpper[] = { 0x04f0, 0x04f0, 0x04f2, 0x04f2, 0x04f4, 0x04f4, + 0x04f6, 0x04f6, 0x04f8, 0x04f8, 0x0500, 0x0500, 0x0502, 0x0502, @@ -2764,6 +2421,57 @@ static const OnigCodePoint CRUpper[] = { 0x2133, 0x2133, 0x213e, 0x213f, 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, 0xff21, 0xff3a, 0x10400, 0x10427, 0x1d400, 0x1d419, @@ -2796,35 +2504,19 @@ static const OnigCodePoint CRUpper[] = { 0x1d71c, 0x1d734, 0x1d756, 0x1d76e, 0x1d790, 0x1d7a8 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRUpper */ +}; /* CR_Upper */ -static const OnigCodePoint CRXDigit[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 3, -#else +/* 'XDigit': [[:XDigit:]] */ +static const OnigCodePoint CR_XDigit[] = { 3, -#endif 0x0030, 0x0039, 0x0041, 0x0046, 0x0061, 0x0066 -}; +}; /* CR_XDigit */ -static const OnigCodePoint CRASCII[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 1, -#else - 1, -#endif - 0x0000, 0x007f -}; - -static const OnigCodePoint CRWord[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 436, -#else - 12, -#endif +/* 'Word': [[:Word:]] */ +static const OnigCodePoint CR_Word[] = { + 464, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -2836,16 +2528,12 @@ static const OnigCodePoint CRWord[] = { 0x00bc, 0x00be, 0x00c0, 0x00d6, 0x00d8, 0x00f6, -#ifndef USE_UNICODE_FULL_RANGE_CTYPE - 0x00f8, 0x7fffffff -#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ - 0x00f8, 0x0236, + 0x00f8, 0x0241, 0x0250, 0x02c1, 0x02c6, 0x02d1, 0x02e0, 0x02e4, 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, + 0x0300, 0x036f, 0x037a, 0x037a, 0x0386, 0x0386, 0x0388, 0x038a, @@ -2853,27 +2541,25 @@ static const OnigCodePoint CRWord[] = { 0x038e, 0x03a1, 0x03a3, 0x03ce, 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, + 0x03f7, 0x0481, 0x0483, 0x0486, 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, + 0x04d0, 0x04f9, 0x0500, 0x050f, 0x0531, 0x0556, 0x0559, 0x0559, 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, + 0x0591, 0x05b9, 0x05bb, 0x05bd, 0x05bf, 0x05bf, 0x05c1, 0x05c2, - 0x05c4, 0x05c4, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, 0x05d0, 0x05ea, 0x05f0, 0x05f2, 0x0610, 0x0615, 0x0621, 0x063a, - 0x0640, 0x0658, + 0x0640, 0x065e, 0x0660, 0x0669, 0x066e, 0x06d3, 0x06d5, 0x06dc, @@ -2881,13 +2567,14 @@ static const OnigCodePoint CRWord[] = { 0x06ea, 0x06fc, 0x06ff, 0x06ff, 0x0710, 0x074a, - 0x074d, 0x074f, + 0x074d, 0x076d, 0x0780, 0x07b1, 0x0901, 0x0939, 0x093c, 0x094d, 0x0950, 0x0954, 0x0958, 0x0963, 0x0966, 0x096f, + 0x097d, 0x097d, 0x0981, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -2897,7 +2584,7 @@ static const OnigCodePoint CRWord[] = { 0x09b6, 0x09b9, 0x09bc, 0x09c4, 0x09c7, 0x09c8, - 0x09cb, 0x09cd, + 0x09cb, 0x09ce, 0x09d7, 0x09d7, 0x09dc, 0x09dd, 0x09df, 0x09e3, @@ -2955,13 +2642,12 @@ static const OnigCodePoint CRWord[] = { 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, + 0x0bae, 0x0bb9, 0x0bbe, 0x0bc2, 0x0bc6, 0x0bc8, 0x0bca, 0x0bcd, 0x0bd7, 0x0bd7, - 0x0be7, 0x0bf2, + 0x0be6, 0x0bf2, 0x0c01, 0x0c03, 0x0c05, 0x0c0c, 0x0c0e, 0x0c10, @@ -3051,36 +2737,30 @@ static const OnigCodePoint CRWord[] = { 0x1040, 0x1049, 0x1050, 0x1059, 0x10a0, 0x10c5, - 0x10d0, 0x10f8, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, 0x1100, 0x1159, 0x115f, 0x11a2, 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, + 0x1200, 0x1248, 0x124a, 0x124d, 0x1250, 0x1256, 0x1258, 0x1258, 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, + 0x1260, 0x1288, 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, + 0x1290, 0x12b0, 0x12b2, 0x12b5, 0x12b8, 0x12be, 0x12c0, 0x12c0, 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, + 0x1318, 0x135a, + 0x135f, 0x135f, 0x1369, 0x137c, + 0x1380, 0x138f, 0x13a0, 0x13f4, 0x1401, 0x166c, 0x166f, 0x1676, @@ -3109,7 +2789,11 @@ static const OnigCodePoint CRWord[] = { 0x1930, 0x193b, 0x1946, 0x196d, 0x1970, 0x1974, - 0x1d00, 0x1d6b, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, 0x1e00, 0x1e9b, 0x1ea0, 0x1ef9, 0x1f00, 0x1f15, @@ -3136,7 +2820,8 @@ static const OnigCodePoint CRWord[] = { 0x2070, 0x2071, 0x2074, 0x2079, 0x207f, 0x2089, - 0x20d0, 0x20ea, + 0x2090, 0x2094, + 0x20d0, 0x20eb, 0x2102, 0x2102, 0x2107, 0x2107, 0x210a, 0x2113, @@ -3148,12 +2833,28 @@ static const OnigCodePoint CRWord[] = { 0x212a, 0x212d, 0x212f, 0x2131, 0x2133, 0x2139, - 0x213d, 0x213f, + 0x213c, 0x213f, 0x2145, 0x2149, 0x2153, 0x2183, 0x2460, 0x249b, 0x24ea, 0x24ff, 0x2776, 0x2793, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2cfd, 0x2cfd, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, 0x3005, 0x3007, 0x3021, 0x302f, 0x3031, 0x3035, @@ -3161,7 +2862,8 @@ static const OnigCodePoint CRWord[] = { 0x3041, 0x3096, 0x3099, 0x309a, 0x309d, 0x309f, - 0x30a1, 0x30ff, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, 0x3105, 0x312c, 0x3131, 0x318e, 0x3192, 0x3195, @@ -3172,11 +2874,13 @@ static const OnigCodePoint CRWord[] = { 0x3280, 0x3289, 0x32b1, 0x32bf, 0x3400, 0x4db5, - 0x4e00, 0x9fa5, + 0x4e00, 0x9fbb, 0xa000, 0xa48c, + 0xa800, 0xa827, 0xac00, 0xd7a3, 0xf900, 0xfa2d, 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, 0xfb00, 0xfb06, 0xfb13, 0xfb17, 0xfb1d, 0xfb28, @@ -3200,7 +2904,7 @@ static const OnigCodePoint CRWord[] = { 0xff21, 0xff3a, 0xff3f, 0xff3f, 0xff41, 0xff5a, - 0xff65, 0xffbe, + 0xff66, 0xffbe, 0xffc2, 0xffc7, 0xffca, 0xffcf, 0xffd2, 0xffd7, @@ -3213,10 +2917,15 @@ static const OnigCodePoint CRWord[] = { 0x10050, 0x1005d, 0x10080, 0x100fa, 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, 0x10300, 0x1031e, 0x10320, 0x10323, 0x10330, 0x1034a, 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, 0x10400, 0x1049d, 0x104a0, 0x104a9, 0x10800, 0x10805, @@ -3225,11 +2934,19 @@ static const OnigCodePoint CRWord[] = { 0x10837, 0x10838, 0x1083c, 0x1083c, 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, 0x1d165, 0x1d169, 0x1d16d, 0x1d172, 0x1d17b, 0x1d182, 0x1d185, 0x1d18b, 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, 0x1d400, 0x1d454, 0x1d456, 0x1d49c, 0x1d49e, 0x1d49f, @@ -3248,7 +2965,7 @@ static const OnigCodePoint CRWord[] = { 0x1d540, 0x1d544, 0x1d546, 0x1d546, 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, + 0x1d552, 0x1d6a5, 0x1d6a8, 0x1d6c0, 0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6fa, @@ -3264,140 +2981,8376 @@ static const OnigCodePoint CRWord[] = { 0x20000, 0x2a6d6, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRWord */ - - -extern int -onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - return onig_is_in_code_range((UChar* )CRAlpha, code); - break; - case ONIGENC_CTYPE_BLANK: - return onig_is_in_code_range((UChar* )CRBlank, code); - break; - case ONIGENC_CTYPE_CNTRL: - return onig_is_in_code_range((UChar* )CRCntrl, code); - break; - case ONIGENC_CTYPE_DIGIT: - return onig_is_in_code_range((UChar* )CRDigit, code); - break; - case ONIGENC_CTYPE_GRAPH: - return onig_is_in_code_range((UChar* )CRGraph, code); - break; - case ONIGENC_CTYPE_LOWER: - return onig_is_in_code_range((UChar* )CRLower, code); - break; - case ONIGENC_CTYPE_PRINT: - return onig_is_in_code_range((UChar* )CRPrint, code); - break; - case ONIGENC_CTYPE_PUNCT: - return onig_is_in_code_range((UChar* )CRPunct, code); - break; - case ONIGENC_CTYPE_SPACE: - return onig_is_in_code_range((UChar* )CRSpace, code); - break; - case ONIGENC_CTYPE_UPPER: - return onig_is_in_code_range((UChar* )CRUpper, code); - break; - case ONIGENC_CTYPE_XDIGIT: - return FALSE; - break; - case ONIGENC_CTYPE_WORD: - return onig_is_in_code_range((UChar* )CRWord, code); - break; - case ONIGENC_CTYPE_ASCII: - return FALSE; - break; - case ONIGENC_CTYPE_ALNUM: - return onig_is_in_code_range((UChar* )CRAlnum, code); - break; - case ONIGENC_CTYPE_NEWLINE: - return FALSE; - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - -#else - - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - return TRUE; - } - return FALSE; -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -} +}; /* CR_Word */ -extern int -onigenc_unicode_get_ctype_code_range(int ctype, - const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) -{ - static const OnigCodePoint EmptyRange[] = { 0 }; - -#define CR_SET(list) do { \ - *mbr = list; \ -} while (0) - - *sbr = EmptyRange; - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - CR_SET(CRAlpha); - break; - case ONIGENC_CTYPE_BLANK: - CR_SET(CRBlank); - break; - case ONIGENC_CTYPE_CNTRL: - CR_SET(CRCntrl); - break; - case ONIGENC_CTYPE_DIGIT: - CR_SET(CRDigit); - break; - case ONIGENC_CTYPE_GRAPH: - CR_SET(CRGraph); - break; - case ONIGENC_CTYPE_LOWER: - CR_SET(CRLower); - break; - case ONIGENC_CTYPE_PRINT: - CR_SET(CRPrint); - break; - case ONIGENC_CTYPE_PUNCT: - CR_SET(CRPunct); - break; - case ONIGENC_CTYPE_SPACE: - CR_SET(CRSpace); - break; - case ONIGENC_CTYPE_UPPER: - CR_SET(CRUpper); - break; - case ONIGENC_CTYPE_XDIGIT: - CR_SET(CRXDigit); - break; - case ONIGENC_CTYPE_WORD: - CR_SET(CRWord); - break; - case ONIGENC_CTYPE_ASCII: - CR_SET(CRASCII); - break; - case ONIGENC_CTYPE_ALNUM: - CR_SET(CRAlnum); - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - - return 0; +/* 'Alnum': [[:Alnum:]] */ +static const OnigCodePoint CR_Alnum[] = { + 436, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alnum */ + +/* 'ASCII': [[:ASCII:]] */ +static const OnigCodePoint CR_ASCII[] = { + 1, + 0x0000, 0x007f +}; /* CR_ASCII */ + +#ifdef USE_UNICODE_PROPERTIES + +/* 'Any': - */ +static const OnigCodePoint CR_Any[] = { + 1, + 0x0000, 0x10ffff +}; /* CR_Any */ + +/* 'Assigned': - */ +static const OnigCodePoint CR_Assigned[] = { + 420, + 0x0000, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xd800, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Assigned */ + +/* 'C': Major Category */ +static const OnigCodePoint CR_C[] = { + 422, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x06dd, 0x06dd, + 0x070e, 0x070f, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17b4, 0x17b5, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x206f, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xf8ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfffb, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d173, 0x1d17a, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe00ff, + 0xe01f0, 0x10ffff +}; /* CR_C */ + +/* 'Cc': General Category */ +static const OnigCodePoint CR_Cc[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x009f +}; /* CR_Cc */ + +/* 'Cf': General Category */ +static const OnigCodePoint CR_Cf[] = { + 14, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f +}; /* CR_Cf */ + +/* 'Cn': General Category */ +static const OnigCodePoint CR_Cn[] = { + 420, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x05ff, + 0x0604, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x070e, 0x070e, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x2064, 0x2069, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xd7ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xfefe, + 0xff00, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfff8, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff +}; /* CR_Cn */ + +/* 'Co': General Category */ +static const OnigCodePoint CR_Co[] = { + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Co */ + +/* 'Cs': General Category */ +static const OnigCodePoint CR_Cs[] = { + 1, + 0xd800, 0xdfff +}; /* CR_Cs */ + +/* 'L': Major Category */ +static const OnigCodePoint CR_L[] = { + 347, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0640, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1d00, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_L */ + +/* 'Ll': General Category */ +static const OnigCodePoint CR_Ll[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Ll */ + +/* 'Lm': General Category */ +static const OnigCodePoint CR_Lm[] = { + 26, + 0x02b0, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0559, 0x0559, + 0x0640, 0x0640, + 0x06e5, 0x06e6, + 0x0e46, 0x0e46, + 0x0ec6, 0x0ec6, + 0x10fc, 0x10fc, + 0x17d7, 0x17d7, + 0x1843, 0x1843, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2090, 0x2094, + 0x2d6f, 0x2d6f, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xff70, 0xff70, + 0xff9e, 0xff9f +}; /* CR_Lm */ + +/* 'Lo': General Category */ +static const OnigCodePoint CR_Lo[] = { + 245, + 0x01bb, 0x01bb, + 0x01c0, 0x01c3, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e45, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10d0, 0x10fa, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17dc, 0x17dc, + 0x1820, 0x1842, + 0x1844, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x2135, 0x2138, + 0x2d30, 0x2d65, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3006, 0x3006, + 0x303c, 0x303c, + 0x3041, 0x3096, + 0x309f, 0x309f, + 0x30a1, 0x30fa, + 0x30ff, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa014, + 0xa016, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10450, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Lo */ + +/* 'Lt': General Category */ +static const OnigCodePoint CR_Lt[] = { + 10, + 0x01c5, 0x01c5, + 0x01c8, 0x01c8, + 0x01cb, 0x01cb, + 0x01f2, 0x01f2, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fbc, 0x1fbc, + 0x1fcc, 0x1fcc, + 0x1ffc, 0x1ffc +}; /* CR_Lt */ + +/* 'Lu': General Category */ +static const OnigCodePoint CR_Lu[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Lu */ + +/* 'M': Major Category */ +static const OnigCodePoint CR_M[] = { + 133, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0488, 0x0489, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06de, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0903, + 0x093c, 0x093c, + 0x093e, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0983, + 0x09bc, 0x09bc, + 0x09be, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a03, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a83, + 0x0abc, 0x0abc, + 0x0abe, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b3c, 0x0b3c, + 0x0b3e, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c82, 0x0c83, + 0x0cbc, 0x0cbc, + 0x0cbe, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f3f, + 0x0f71, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1056, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b6, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa823, 0xa827, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_M */ + +/* 'Mc': General Category */ +static const OnigCodePoint CR_Mc[] = { + 63, + 0x0903, 0x0903, + 0x093e, 0x0940, + 0x0949, 0x094c, + 0x0982, 0x0983, + 0x09be, 0x09c0, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09d7, 0x09d7, + 0x0a03, 0x0a03, + 0x0a3e, 0x0a40, + 0x0a83, 0x0a83, + 0x0abe, 0x0ac0, + 0x0ac9, 0x0ac9, + 0x0acb, 0x0acc, + 0x0b02, 0x0b03, + 0x0b3e, 0x0b3e, + 0x0b40, 0x0b40, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b57, 0x0b57, + 0x0bbe, 0x0bbf, + 0x0bc1, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c41, 0x0c44, + 0x0c82, 0x0c83, + 0x0cbe, 0x0cbe, + 0x0cc0, 0x0cc4, + 0x0cc7, 0x0cc8, + 0x0cca, 0x0ccb, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d40, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dcf, 0x0dd1, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0f3e, 0x0f3f, + 0x0f7f, 0x0f7f, + 0x102c, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x1056, 0x1057, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a19, 0x1a1b, + 0xa802, 0xa802, + 0xa823, 0xa824, + 0xa827, 0xa827, + 0x1d165, 0x1d166, + 0x1d16d, 0x1d172 +}; /* CR_Mc */ + +/* 'Me': General Category */ +static const OnigCodePoint CR_Me[] = { + 4, + 0x0488, 0x0489, + 0x06de, 0x06de, + 0x20dd, 0x20e0, + 0x20e2, 0x20e4 +}; /* CR_Me */ + +/* 'Mn': General Category */ +static const OnigCodePoint CR_Mn[] = { + 124, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06df, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3f, 0x0b3f, + 0x0b41, 0x0b43, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b56, + 0x0b82, 0x0b82, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0d41, 0x0d43, + 0x0d4d, 0x0d4d, + 0x0dca, 0x0dca, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1032, + 0x1036, 0x1037, + 0x1039, 0x1039, + 0x1058, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_Mn */ + +/* 'N': Major Category */ +static const OnigCodePoint CR_N[] = { + 53, + 0x0030, 0x0039, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x09f4, 0x09f9, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bf2, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f33, + 0x1040, 0x1049, + 0x1369, 0x137c, + 0x16ee, 0x16f0, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xff10, 0xff19, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x104a0, 0x104a9, + 0x10a40, 0x10a47, + 0x1d7ce, 0x1d7ff +}; /* CR_N */ + +/* 'Nd': General Category */ +static const OnigCodePoint CR_Nd[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Nd */ + +/* 'Nl': General Category */ +static const OnigCodePoint CR_Nl[] = { + 8, + 0x16ee, 0x16f0, + 0x2160, 0x2183, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x10140, 0x10174, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5 +}; /* CR_Nl */ + +/* 'No': General Category */ +static const OnigCodePoint CR_No[] = { + 26, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x09f4, 0x09f9, + 0x0bf0, 0x0bf2, + 0x0f2a, 0x0f33, + 0x1369, 0x137c, + 0x17f0, 0x17f9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x215f, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x10107, 0x10133, + 0x10175, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x10a40, 0x10a47 +}; /* CR_No */ + +/* 'P': Major Category */ +static const OnigCodePoint CR_P[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_P */ + +/* 'Pc': General Category */ +static const OnigCodePoint CR_Pc[] = { + 6, + 0x005f, 0x005f, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xff3f, 0xff3f +}; /* CR_Pc */ + +/* 'Pd': General Category */ +static const OnigCodePoint CR_Pd[] = { + 12, + 0x002d, 0x002d, + 0x058a, 0x058a, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2e17, 0x2e17, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d +}; /* CR_Pd */ + +/* 'Pe': General Category */ +static const OnigCodePoint CR_Pe[] = { + 65, + 0x0029, 0x0029, + 0x005d, 0x005d, + 0x007d, 0x007d, + 0x0f3b, 0x0f3b, + 0x0f3d, 0x0f3d, + 0x169c, 0x169c, + 0x2046, 0x2046, + 0x207e, 0x207e, + 0x208e, 0x208e, + 0x232a, 0x232a, + 0x23b5, 0x23b5, + 0x2769, 0x2769, + 0x276b, 0x276b, + 0x276d, 0x276d, + 0x276f, 0x276f, + 0x2771, 0x2771, + 0x2773, 0x2773, + 0x2775, 0x2775, + 0x27c6, 0x27c6, + 0x27e7, 0x27e7, + 0x27e9, 0x27e9, + 0x27eb, 0x27eb, + 0x2984, 0x2984, + 0x2986, 0x2986, + 0x2988, 0x2988, + 0x298a, 0x298a, + 0x298c, 0x298c, + 0x298e, 0x298e, + 0x2990, 0x2990, + 0x2992, 0x2992, + 0x2994, 0x2994, + 0x2996, 0x2996, + 0x2998, 0x2998, + 0x29d9, 0x29d9, + 0x29db, 0x29db, + 0x29fd, 0x29fd, + 0x3009, 0x3009, + 0x300b, 0x300b, + 0x300d, 0x300d, + 0x300f, 0x300f, + 0x3011, 0x3011, + 0x3015, 0x3015, + 0x3017, 0x3017, + 0x3019, 0x3019, + 0x301b, 0x301b, + 0x301e, 0x301f, + 0xfd3f, 0xfd3f, + 0xfe18, 0xfe18, + 0xfe36, 0xfe36, + 0xfe38, 0xfe38, + 0xfe3a, 0xfe3a, + 0xfe3c, 0xfe3c, + 0xfe3e, 0xfe3e, + 0xfe40, 0xfe40, + 0xfe42, 0xfe42, + 0xfe44, 0xfe44, + 0xfe48, 0xfe48, + 0xfe5a, 0xfe5a, + 0xfe5c, 0xfe5c, + 0xfe5e, 0xfe5e, + 0xff09, 0xff09, + 0xff3d, 0xff3d, + 0xff5d, 0xff5d, + 0xff60, 0xff60, + 0xff63, 0xff63 +}; /* CR_Pe */ + +/* 'Pf': General Category */ +static const OnigCodePoint CR_Pf[] = { + 9, + 0x00bb, 0x00bb, + 0x2019, 0x2019, + 0x201d, 0x201d, + 0x203a, 0x203a, + 0x2e03, 0x2e03, + 0x2e05, 0x2e05, + 0x2e0a, 0x2e0a, + 0x2e0d, 0x2e0d, + 0x2e1d, 0x2e1d +}; /* CR_Pf */ + +/* 'Pi': General Category */ +static const OnigCodePoint CR_Pi[] = { + 10, + 0x00ab, 0x00ab, + 0x2018, 0x2018, + 0x201b, 0x201c, + 0x201f, 0x201f, + 0x2039, 0x2039, + 0x2e02, 0x2e02, + 0x2e04, 0x2e04, + 0x2e09, 0x2e09, + 0x2e0c, 0x2e0c, + 0x2e1c, 0x2e1c +}; /* CR_Pi */ + +/* 'Po': General Category */ +static const OnigCodePoint CR_Po[] = { + 88, + 0x0021, 0x0023, + 0x0025, 0x0027, + 0x002a, 0x002a, + 0x002c, 0x002c, + 0x002e, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005c, 0x005c, + 0x00a1, 0x00a1, + 0x00b7, 0x00b7, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x0589, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x1805, + 0x1807, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2016, 0x2017, + 0x2020, 0x2027, + 0x2030, 0x2038, + 0x203b, 0x203e, + 0x2041, 0x2043, + 0x2047, 0x2051, + 0x2053, 0x2053, + 0x2055, 0x205e, + 0x23b6, 0x23b6, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e01, + 0x2e06, 0x2e08, + 0x2e0b, 0x2e0b, + 0x2e0e, 0x2e16, + 0x3001, 0x3003, + 0x303d, 0x303d, + 0x30fb, 0x30fb, + 0xfe10, 0xfe16, + 0xfe19, 0xfe19, + 0xfe30, 0xfe30, + 0xfe45, 0xfe46, + 0xfe49, 0xfe4c, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xfe5f, 0xfe61, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff07, + 0xff0a, 0xff0a, + 0xff0c, 0xff0c, + 0xff0e, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3c, 0xff3c, + 0xff61, 0xff61, + 0xff64, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Po */ + +/* 'Ps': General Category */ +static const OnigCodePoint CR_Ps[] = { + 67, + 0x0028, 0x0028, + 0x005b, 0x005b, + 0x007b, 0x007b, + 0x0f3a, 0x0f3a, + 0x0f3c, 0x0f3c, + 0x169b, 0x169b, + 0x201a, 0x201a, + 0x201e, 0x201e, + 0x2045, 0x2045, + 0x207d, 0x207d, + 0x208d, 0x208d, + 0x2329, 0x2329, + 0x23b4, 0x23b4, + 0x2768, 0x2768, + 0x276a, 0x276a, + 0x276c, 0x276c, + 0x276e, 0x276e, + 0x2770, 0x2770, + 0x2772, 0x2772, + 0x2774, 0x2774, + 0x27c5, 0x27c5, + 0x27e6, 0x27e6, + 0x27e8, 0x27e8, + 0x27ea, 0x27ea, + 0x2983, 0x2983, + 0x2985, 0x2985, + 0x2987, 0x2987, + 0x2989, 0x2989, + 0x298b, 0x298b, + 0x298d, 0x298d, + 0x298f, 0x298f, + 0x2991, 0x2991, + 0x2993, 0x2993, + 0x2995, 0x2995, + 0x2997, 0x2997, + 0x29d8, 0x29d8, + 0x29da, 0x29da, + 0x29fc, 0x29fc, + 0x3008, 0x3008, + 0x300a, 0x300a, + 0x300c, 0x300c, + 0x300e, 0x300e, + 0x3010, 0x3010, + 0x3014, 0x3014, + 0x3016, 0x3016, + 0x3018, 0x3018, + 0x301a, 0x301a, + 0x301d, 0x301d, + 0xfd3e, 0xfd3e, + 0xfe17, 0xfe17, + 0xfe35, 0xfe35, + 0xfe37, 0xfe37, + 0xfe39, 0xfe39, + 0xfe3b, 0xfe3b, + 0xfe3d, 0xfe3d, + 0xfe3f, 0xfe3f, + 0xfe41, 0xfe41, + 0xfe43, 0xfe43, + 0xfe47, 0xfe47, + 0xfe59, 0xfe59, + 0xfe5b, 0xfe5b, + 0xfe5d, 0xfe5d, + 0xff08, 0xff08, + 0xff3b, 0xff3b, + 0xff5b, 0xff5b, + 0xff5f, 0xff5f, + 0xff62, 0xff62 +}; /* CR_Ps */ + +/* 'S': Major Category */ +static const OnigCodePoint CR_S[] = { + 162, + 0x0024, 0x0024, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00a2, 0x00a9, + 0x00ac, 0x00ac, + 0x00ae, 0x00b1, + 0x00b4, 0x00b4, + 0x00b6, 0x00b6, + 0x00b8, 0x00b8, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x03f6, 0x03f6, + 0x0482, 0x0482, + 0x060b, 0x060b, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09f2, 0x09f3, + 0x09fa, 0x09fa, + 0x0af1, 0x0af1, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bfa, + 0x0e3f, 0x0e3f, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x17db, 0x17db, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x20a0, 0x20b5, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x2140, 0x2144, + 0x214a, 0x214c, + 0x2190, 0x2328, + 0x232b, 0x23b3, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x309b, 0x309c, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa828, 0xa82b, + 0xfb29, 0xfb29, + 0xfdfc, 0xfdfd, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_S */ + +/* 'Sc': General Category */ +static const OnigCodePoint CR_Sc[] = { + 14, + 0x0024, 0x0024, + 0x00a2, 0x00a5, + 0x060b, 0x060b, + 0x09f2, 0x09f3, + 0x0af1, 0x0af1, + 0x0bf9, 0x0bf9, + 0x0e3f, 0x0e3f, + 0x17db, 0x17db, + 0x20a0, 0x20b5, + 0xfdfc, 0xfdfc, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xffe0, 0xffe1, + 0xffe5, 0xffe6 +}; /* CR_Sc */ + +/* 'Sk': General Category */ +static const OnigCodePoint CR_Sk[] = { + 23, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b8, 0x00b8, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x309b, 0x309c, + 0xa700, 0xa716, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xffe3, 0xffe3 +}; /* CR_Sk */ + +/* 'Sm': General Category */ +static const OnigCodePoint CR_Sm[] = { + 59, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00ac, 0x00ac, + 0x00b1, 0x00b1, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x03f6, 0x03f6, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x2140, 0x2144, + 0x214b, 0x214b, + 0x2190, 0x2194, + 0x219a, 0x219b, + 0x21a0, 0x21a0, + 0x21a3, 0x21a3, + 0x21a6, 0x21a6, + 0x21ae, 0x21ae, + 0x21ce, 0x21cf, + 0x21d2, 0x21d2, + 0x21d4, 0x21d4, + 0x21f4, 0x22ff, + 0x2308, 0x230b, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b3, + 0x25b7, 0x25b7, + 0x25c1, 0x25c1, + 0x25f8, 0x25ff, + 0x266f, 0x266f, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x27ff, + 0x2900, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2aff, + 0xfb29, 0xfb29, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_Sm */ + +/* 'So': General Category */ +static const OnigCodePoint CR_So[] = { + 120, + 0x00a6, 0x00a7, + 0x00a9, 0x00a9, + 0x00ae, 0x00ae, + 0x00b0, 0x00b0, + 0x00b6, 0x00b6, + 0x0482, 0x0482, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09fa, 0x09fa, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bf8, + 0x0bfa, 0x0bfa, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x214a, 0x214a, + 0x214c, 0x214c, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21ad, + 0x21af, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21f3, + 0x2300, 0x2307, + 0x230c, 0x231f, + 0x2322, 0x2328, + 0x232b, 0x237b, + 0x237d, 0x239a, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x25b6, + 0x25b8, 0x25c0, + 0x25c2, 0x25f7, + 0x2600, 0x266e, + 0x2670, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x2800, 0x28ff, + 0x2b00, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa828, 0xa82b, + 0xfdfd, 0xfdfd, + 0xffe4, 0xffe4, + 0xffe8, 0xffe8, + 0xffed, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356 +}; /* CR_So */ + +/* 'Z': Major Category */ +static const OnigCodePoint CR_Z[] = { + 9, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Z */ + +/* 'Zl': General Category */ +static const OnigCodePoint CR_Zl[] = { + 1, + 0x2028, 0x2028 +}; /* CR_Zl */ + +/* 'Zp': General Category */ +static const OnigCodePoint CR_Zp[] = { + 1, + 0x2029, 0x2029 +}; /* CR_Zp */ + +/* 'Zs': General Category */ +static const OnigCodePoint CR_Zs[] = { + 8, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Zs */ + +/* 'Arabic': Script */ +static const OnigCodePoint CR_Arabic[] = { + 17, + 0x060b, 0x060b, + 0x060d, 0x0615, + 0x061e, 0x061e, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x0656, 0x065e, + 0x066a, 0x066f, + 0x0671, 0x06dc, + 0x06de, 0x06ff, + 0x0750, 0x076d, + 0xfb50, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfc, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc +}; /* CR_Arabic */ + +/* 'Armenian': Script */ +static const OnigCodePoint CR_Armenian[] = { + 5, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x058a, 0x058a, + 0xfb13, 0xfb17 +}; /* CR_Armenian */ + +/* 'Bengali': Script */ +static const OnigCodePoint CR_Bengali[] = { + 14, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa +}; /* CR_Bengali */ + +/* 'Bopomofo': Script */ +static const OnigCodePoint CR_Bopomofo[] = { + 2, + 0x3105, 0x312c, + 0x31a0, 0x31b7 +}; /* CR_Bopomofo */ + +/* 'Braille': Script */ +static const OnigCodePoint CR_Braille[] = { + 1, + 0x2800, 0x28ff +}; /* CR_Braille */ + +/* 'Buginese': Script */ +static const OnigCodePoint CR_Buginese[] = { + 2, + 0x1a00, 0x1a1b, + 0x1a1e, 0x1a1f +}; /* CR_Buginese */ + +/* 'Buhid': Script */ +static const OnigCodePoint CR_Buhid[] = { + 1, + 0x1740, 0x1753 +}; /* CR_Buhid */ + +/* 'Canadian_Aboriginal': Script */ +static const OnigCodePoint CR_Canadian_Aboriginal[] = { + 1, + 0x1401, 0x1676 +}; /* CR_Canadian_Aboriginal */ + +/* 'Cherokee': Script */ +static const OnigCodePoint CR_Cherokee[] = { + 1, + 0x13a0, 0x13f4 +}; /* CR_Cherokee */ + +/* 'Common': Script */ +static const OnigCodePoint CR_Common[] = { + 126, + 0x0000, 0x0040, + 0x005b, 0x0060, + 0x007b, 0x00a9, + 0x00ab, 0x00b9, + 0x00bb, 0x00bf, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02b9, 0x02df, + 0x02e5, 0x02ff, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x0589, 0x0589, + 0x0600, 0x0603, + 0x060c, 0x060c, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0640, 0x0640, + 0x0660, 0x0669, + 0x06dd, 0x06dd, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0e3f, 0x0e3f, + 0x10fb, 0x10fb, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x2000, 0x200b, + 0x200e, 0x2063, + 0x206a, 0x2070, + 0x2074, 0x207e, + 0x2080, 0x208e, + 0x20a0, 0x20b5, + 0x2100, 0x2125, + 0x2127, 0x2129, + 0x212c, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x27ff, + 0x2900, 0x2b13, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2ff0, 0x2ffb, + 0x3000, 0x3004, + 0x3006, 0x3006, + 0x3008, 0x3020, + 0x3030, 0x3037, + 0x303c, 0x303f, + 0x309b, 0x309c, + 0x30a0, 0x30a0, + 0x30fb, 0x30fc, + 0x3190, 0x319f, + 0x31c0, 0x31cf, + 0x3220, 0x3243, + 0x3250, 0x325f, + 0x327e, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa700, 0xa716, + 0xe000, 0xf8ff, + 0xfd3e, 0xfd3f, + 0xfdfd, 0xfdfd, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfeff, 0xfeff, + 0xff01, 0xff20, + 0xff3b, 0xff40, + 0xff5b, 0xff65, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d166, + 0x1d16a, 0x1d17a, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Common */ + +/* 'Coptic': Script */ +static const OnigCodePoint CR_Coptic[] = { + 3, + 0x03e2, 0x03ef, + 0x2c80, 0x2cea, + 0x2cf9, 0x2cff +}; /* CR_Coptic */ + +/* 'Cypriot': Script */ +static const OnigCodePoint CR_Cypriot[] = { + 6, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f +}; /* CR_Cypriot */ + +/* 'Cyrillic': Script */ +static const OnigCodePoint CR_Cyrillic[] = { + 6, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x1d2b, 0x1d2b, + 0x1d78, 0x1d78 +}; /* CR_Cyrillic */ + +/* 'Deseret': Script */ +static const OnigCodePoint CR_Deseret[] = { + 1, + 0x10400, 0x1044f +}; /* CR_Deseret */ + +/* 'Devanagari': Script */ +static const OnigCodePoint CR_Devanagari[] = { + 6, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d +}; /* CR_Devanagari */ + +/* 'Ethiopic': Script */ +static const OnigCodePoint CR_Ethiopic[] = { + 27, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde +}; /* CR_Ethiopic */ + +/* 'Georgian': Script */ +static const OnigCodePoint CR_Georgian[] = { + 4, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x2d00, 0x2d25 +}; /* CR_Georgian */ + +/* 'Glagolitic': Script */ +static const OnigCodePoint CR_Glagolitic[] = { + 2, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e +}; /* CR_Glagolitic */ + +/* 'Gothic': Script */ +static const OnigCodePoint CR_Gothic[] = { + 1, + 0x10330, 0x1034a +}; /* CR_Gothic */ + +/* 'Greek': Script */ +static const OnigCodePoint CR_Greek[] = { + 31, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x0384, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03e1, + 0x03f0, 0x03ff, + 0x1d26, 0x1d2a, + 0x1d5d, 0x1d61, + 0x1d66, 0x1d6a, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2126, 0x2126, + 0x10140, 0x1018a, + 0x1d200, 0x1d245 +}; /* CR_Greek */ + +/* 'Gujarati': Script */ +static const OnigCodePoint CR_Gujarati[] = { + 14, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1 +}; /* CR_Gujarati */ + +/* 'Gurmukhi': Script */ +static const OnigCodePoint CR_Gurmukhi[] = { + 15, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74 +}; /* CR_Gurmukhi */ + +/* 'Han': Script */ +static const OnigCodePoint CR_Han[] = { + 14, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x3005, 0x3005, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303b, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Han */ + +/* 'Hangul': Script */ +static const OnigCodePoint CR_Hangul[] = { + 12, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x3131, 0x318e, + 0x3200, 0x321e, + 0x3260, 0x327d, + 0xac00, 0xd7a3, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc +}; /* CR_Hangul */ + +/* 'Hanunoo': Script */ +static const OnigCodePoint CR_Hanunoo[] = { + 1, + 0x1720, 0x1734 +}; /* CR_Hanunoo */ + +/* 'Hebrew': Script */ +static const OnigCodePoint CR_Hebrew[] = { + 10, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfb4f +}; /* CR_Hebrew */ + +/* 'Hiragana': Script */ +static const OnigCodePoint CR_Hiragana[] = { + 2, + 0x3041, 0x3096, + 0x309d, 0x309f +}; /* CR_Hiragana */ + +/* 'Inherited': Script */ +static const OnigCodePoint CR_Inherited[] = { + 15, + 0x0300, 0x036f, + 0x064b, 0x0655, + 0x0670, 0x0670, + 0x1dc0, 0x1dc3, + 0x200c, 0x200d, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0xe0100, 0xe01ef +}; /* CR_Inherited */ + +/* 'Kannada': Script */ +static const OnigCodePoint CR_Kannada[] = { + 13, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef +}; /* CR_Kannada */ + +/* 'Katakana': Script */ +static const OnigCodePoint CR_Katakana[] = { + 5, + 0x30a1, 0x30fa, + 0x30fd, 0x30ff, + 0x31f0, 0x31ff, + 0xff66, 0xff6f, + 0xff71, 0xff9d +}; /* CR_Katakana */ + +/* 'Kharoshthi': Script */ +static const OnigCodePoint CR_Kharoshthi[] = { + 8, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58 +}; /* CR_Kharoshthi */ + +/* 'Khmer': Script */ +static const OnigCodePoint CR_Khmer[] = { + 4, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x19e0, 0x19ff +}; /* CR_Khmer */ + +/* 'Lao': Script */ +static const OnigCodePoint CR_Lao[] = { + 18, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd +}; /* CR_Lao */ + +/* 'Latin': Script */ +static const OnigCodePoint CR_Latin[] = { + 23, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02b8, + 0x02e0, 0x02e4, + 0x1d00, 0x1d25, + 0x1d2c, 0x1d5c, + 0x1d62, 0x1d65, + 0x1d6b, 0x1d77, + 0x1d79, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x212a, 0x212b, + 0xfb00, 0xfb06, + 0xff21, 0xff3a, + 0xff41, 0xff5a +}; /* CR_Latin */ + +/* 'Limbu': Script */ +static const OnigCodePoint CR_Limbu[] = { + 5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x194f +}; /* CR_Limbu */ + +/* 'Linear_B': Script */ +static const OnigCodePoint CR_Linear_B[] = { + 7, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa +}; /* CR_Linear_B */ + +/* 'Malayalam': Script */ +static const OnigCodePoint CR_Malayalam[] = { + 11, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f +}; /* CR_Malayalam */ + +/* 'Mongolian': Script */ +static const OnigCodePoint CR_Mongolian[] = { + 4, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9 +}; /* CR_Mongolian */ + +/* 'Myanmar': Script */ +static const OnigCodePoint CR_Myanmar[] = { + 6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059 +}; /* CR_Myanmar */ + +/* 'New_Tai_Lue': Script */ +static const OnigCodePoint CR_New_Tai_Lue[] = { + 4, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x19df +}; /* CR_New_Tai_Lue */ + +/* 'Ogham': Script */ +static const OnigCodePoint CR_Ogham[] = { + 1, + 0x1680, 0x169c +}; /* CR_Ogham */ + +/* 'Old_Italic': Script */ +static const OnigCodePoint CR_Old_Italic[] = { + 2, + 0x10300, 0x1031e, + 0x10320, 0x10323 +}; /* CR_Old_Italic */ + +/* 'Old_Persian': Script */ +static const OnigCodePoint CR_Old_Persian[] = { + 2, + 0x103a0, 0x103c3, + 0x103c8, 0x103d5 +}; /* CR_Old_Persian */ + +/* 'Oriya': Script */ +static const OnigCodePoint CR_Oriya[] = { + 14, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71 +}; /* CR_Oriya */ + +/* 'Osmanya': Script */ +static const OnigCodePoint CR_Osmanya[] = { + 2, + 0x10480, 0x1049d, + 0x104a0, 0x104a9 +}; /* CR_Osmanya */ + +/* 'Runic': Script */ +static const OnigCodePoint CR_Runic[] = { + 2, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0 +}; /* CR_Runic */ + +/* 'Shavian': Script */ +static const OnigCodePoint CR_Shavian[] = { + 1, + 0x10450, 0x1047f +}; /* CR_Shavian */ + +/* 'Sinhala': Script */ +static const OnigCodePoint CR_Sinhala[] = { + 11, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4 +}; /* CR_Sinhala */ + +/* 'Syloti_Nagri': Script */ +static const OnigCodePoint CR_Syloti_Nagri[] = { + 1, + 0xa800, 0xa82b +}; /* CR_Syloti_Nagri */ + +/* 'Syriac': Script */ +static const OnigCodePoint CR_Syriac[] = { + 3, + 0x0700, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f +}; /* CR_Syriac */ + +/* 'Tagalog': Script */ +static const OnigCodePoint CR_Tagalog[] = { + 2, + 0x1700, 0x170c, + 0x170e, 0x1714 +}; /* CR_Tagalog */ + +/* 'Tagbanwa': Script */ +static const OnigCodePoint CR_Tagbanwa[] = { + 3, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773 +}; /* CR_Tagbanwa */ + +/* 'Tai_Le': Script */ +static const OnigCodePoint CR_Tai_Le[] = { + 2, + 0x1950, 0x196d, + 0x1970, 0x1974 +}; /* CR_Tai_Le */ + +/* 'Tamil': Script */ +static const OnigCodePoint CR_Tamil[] = { + 15, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa +}; /* CR_Tamil */ + +/* 'Telugu': Script */ +static const OnigCodePoint CR_Telugu[] = { + 12, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f +}; /* CR_Telugu */ + +/* 'Thaana': Script */ +static const OnigCodePoint CR_Thaana[] = { + 1, + 0x0780, 0x07b1 +}; /* CR_Thaana */ + +/* 'Thai': Script */ +static const OnigCodePoint CR_Thai[] = { + 2, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e5b +}; /* CR_Thai */ + +/* 'Tibetan': Script */ +static const OnigCodePoint CR_Tibetan[] = { + 7, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1 +}; /* CR_Tibetan */ + +/* 'Tifinagh': Script */ +static const OnigCodePoint CR_Tifinagh[] = { + 2, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f +}; /* CR_Tifinagh */ + +/* 'Ugaritic': Script */ +static const OnigCodePoint CR_Ugaritic[] = { + 2, + 0x10380, 0x1039d, + 0x1039f, 0x1039f +}; /* CR_Ugaritic */ + +/* 'Yi': Script */ +static const OnigCodePoint CR_Yi[] = { + 2, + 0xa000, 0xa48c, + 0xa490, 0xa4c6 +}; /* CR_Yi */ + + +#endif /* USE_UNICODE_PROPERTIES */ + + +typedef struct { + int n; + OnigCodePoint code[3]; +} CodePointList3; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseFold_11_Type; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseUnfold_11_Type; + +typedef struct { + int n; + OnigCodePoint code[2]; +} CodePointList2; + +typedef struct { + OnigCodePoint from[2]; + CodePointList2 to; +} CaseUnfold_12_Type; + +typedef struct { + OnigCodePoint from[3]; + CodePointList2 to; +} CaseUnfold_13_Type; + +static const CaseFold_11_Type CaseFold[] = { + { 0x0041, {1, {0x0061}}}, + { 0x0042, {1, {0x0062}}}, + { 0x0043, {1, {0x0063}}}, + { 0x0044, {1, {0x0064}}}, + { 0x0045, {1, {0x0065}}}, + { 0x0046, {1, {0x0066}}}, + { 0x0047, {1, {0x0067}}}, + { 0x0048, {1, {0x0068}}}, + { 0x004a, {1, {0x006a}}}, + { 0x004b, {1, {0x006b}}}, + { 0x004c, {1, {0x006c}}}, + { 0x004d, {1, {0x006d}}}, + { 0x004e, {1, {0x006e}}}, + { 0x004f, {1, {0x006f}}}, + { 0x0050, {1, {0x0070}}}, + { 0x0051, {1, {0x0071}}}, + { 0x0052, {1, {0x0072}}}, + { 0x0053, {1, {0x0073}}}, + { 0x0054, {1, {0x0074}}}, + { 0x0055, {1, {0x0075}}}, + { 0x0056, {1, {0x0076}}}, + { 0x0057, {1, {0x0077}}}, + { 0x0058, {1, {0x0078}}}, + { 0x0059, {1, {0x0079}}}, + { 0x005a, {1, {0x007a}}}, + { 0x00b5, {1, {0x03bc}}}, + { 0x00c0, {1, {0x00e0}}}, + { 0x00c1, {1, {0x00e1}}}, + { 0x00c2, {1, {0x00e2}}}, + { 0x00c3, {1, {0x00e3}}}, + { 0x00c4, {1, {0x00e4}}}, + { 0x00c5, {1, {0x00e5}}}, + { 0x00c6, {1, {0x00e6}}}, + { 0x00c7, {1, {0x00e7}}}, + { 0x00c8, {1, {0x00e8}}}, + { 0x00c9, {1, {0x00e9}}}, + { 0x00ca, {1, {0x00ea}}}, + { 0x00cb, {1, {0x00eb}}}, + { 0x00cc, {1, {0x00ec}}}, + { 0x00cd, {1, {0x00ed}}}, + { 0x00ce, {1, {0x00ee}}}, + { 0x00cf, {1, {0x00ef}}}, + { 0x00d0, {1, {0x00f0}}}, + { 0x00d1, {1, {0x00f1}}}, + { 0x00d2, {1, {0x00f2}}}, + { 0x00d3, {1, {0x00f3}}}, + { 0x00d4, {1, {0x00f4}}}, + { 0x00d5, {1, {0x00f5}}}, + { 0x00d6, {1, {0x00f6}}}, + { 0x00d8, {1, {0x00f8}}}, + { 0x00d9, {1, {0x00f9}}}, + { 0x00da, {1, {0x00fa}}}, + { 0x00db, {1, {0x00fb}}}, + { 0x00dc, {1, {0x00fc}}}, + { 0x00dd, {1, {0x00fd}}}, + { 0x00de, {1, {0x00fe}}}, + { 0x00df, {2, {0x0073, 0x0073}}}, + { 0x0100, {1, {0x0101}}}, + { 0x0102, {1, {0x0103}}}, + { 0x0104, {1, {0x0105}}}, + { 0x0106, {1, {0x0107}}}, + { 0x0108, {1, {0x0109}}}, + { 0x010a, {1, {0x010b}}}, + { 0x010c, {1, {0x010d}}}, + { 0x010e, {1, {0x010f}}}, + { 0x0110, {1, {0x0111}}}, + { 0x0112, {1, {0x0113}}}, + { 0x0114, {1, {0x0115}}}, + { 0x0116, {1, {0x0117}}}, + { 0x0118, {1, {0x0119}}}, + { 0x011a, {1, {0x011b}}}, + { 0x011c, {1, {0x011d}}}, + { 0x011e, {1, {0x011f}}}, + { 0x0120, {1, {0x0121}}}, + { 0x0122, {1, {0x0123}}}, + { 0x0124, {1, {0x0125}}}, + { 0x0126, {1, {0x0127}}}, + { 0x0128, {1, {0x0129}}}, + { 0x012a, {1, {0x012b}}}, + { 0x012c, {1, {0x012d}}}, + { 0x012e, {1, {0x012f}}}, + { 0x0132, {1, {0x0133}}}, + { 0x0134, {1, {0x0135}}}, + { 0x0136, {1, {0x0137}}}, + { 0x0139, {1, {0x013a}}}, + { 0x013b, {1, {0x013c}}}, + { 0x013d, {1, {0x013e}}}, + { 0x013f, {1, {0x0140}}}, + { 0x0141, {1, {0x0142}}}, + { 0x0143, {1, {0x0144}}}, + { 0x0145, {1, {0x0146}}}, + { 0x0147, {1, {0x0148}}}, + { 0x0149, {2, {0x02bc, 0x006e}}}, + { 0x014a, {1, {0x014b}}}, + { 0x014c, {1, {0x014d}}}, + { 0x014e, {1, {0x014f}}}, + { 0x0150, {1, {0x0151}}}, + { 0x0152, {1, {0x0153}}}, + { 0x0154, {1, {0x0155}}}, + { 0x0156, {1, {0x0157}}}, + { 0x0158, {1, {0x0159}}}, + { 0x015a, {1, {0x015b}}}, + { 0x015c, {1, {0x015d}}}, + { 0x015e, {1, {0x015f}}}, + { 0x0160, {1, {0x0161}}}, + { 0x0162, {1, {0x0163}}}, + { 0x0164, {1, {0x0165}}}, + { 0x0166, {1, {0x0167}}}, + { 0x0168, {1, {0x0169}}}, + { 0x016a, {1, {0x016b}}}, + { 0x016c, {1, {0x016d}}}, + { 0x016e, {1, {0x016f}}}, + { 0x0170, {1, {0x0171}}}, + { 0x0172, {1, {0x0173}}}, + { 0x0174, {1, {0x0175}}}, + { 0x0176, {1, {0x0177}}}, + { 0x0178, {1, {0x00ff}}}, + { 0x0179, {1, {0x017a}}}, + { 0x017b, {1, {0x017c}}}, + { 0x017d, {1, {0x017e}}}, + { 0x017f, {1, {0x0073}}}, + { 0x0181, {1, {0x0253}}}, + { 0x0182, {1, {0x0183}}}, + { 0x0184, {1, {0x0185}}}, + { 0x0186, {1, {0x0254}}}, + { 0x0187, {1, {0x0188}}}, + { 0x0189, {1, {0x0256}}}, + { 0x018a, {1, {0x0257}}}, + { 0x018b, {1, {0x018c}}}, + { 0x018e, {1, {0x01dd}}}, + { 0x018f, {1, {0x0259}}}, + { 0x0190, {1, {0x025b}}}, + { 0x0191, {1, {0x0192}}}, + { 0x0193, {1, {0x0260}}}, + { 0x0194, {1, {0x0263}}}, + { 0x0196, {1, {0x0269}}}, + { 0x0197, {1, {0x0268}}}, + { 0x0198, {1, {0x0199}}}, + { 0x019c, {1, {0x026f}}}, + { 0x019d, {1, {0x0272}}}, + { 0x019f, {1, {0x0275}}}, + { 0x01a0, {1, {0x01a1}}}, + { 0x01a2, {1, {0x01a3}}}, + { 0x01a4, {1, {0x01a5}}}, + { 0x01a6, {1, {0x0280}}}, + { 0x01a7, {1, {0x01a8}}}, + { 0x01a9, {1, {0x0283}}}, + { 0x01ac, {1, {0x01ad}}}, + { 0x01ae, {1, {0x0288}}}, + { 0x01af, {1, {0x01b0}}}, + { 0x01b1, {1, {0x028a}}}, + { 0x01b2, {1, {0x028b}}}, + { 0x01b3, {1, {0x01b4}}}, + { 0x01b5, {1, {0x01b6}}}, + { 0x01b7, {1, {0x0292}}}, + { 0x01b8, {1, {0x01b9}}}, + { 0x01bc, {1, {0x01bd}}}, + { 0x01c4, {1, {0x01c6}}}, + { 0x01c5, {1, {0x01c6}}}, + { 0x01c7, {1, {0x01c9}}}, + { 0x01c8, {1, {0x01c9}}}, + { 0x01ca, {1, {0x01cc}}}, + { 0x01cb, {1, {0x01cc}}}, + { 0x01cd, {1, {0x01ce}}}, + { 0x01cf, {1, {0x01d0}}}, + { 0x01d1, {1, {0x01d2}}}, + { 0x01d3, {1, {0x01d4}}}, + { 0x01d5, {1, {0x01d6}}}, + { 0x01d7, {1, {0x01d8}}}, + { 0x01d9, {1, {0x01da}}}, + { 0x01db, {1, {0x01dc}}}, + { 0x01de, {1, {0x01df}}}, + { 0x01e0, {1, {0x01e1}}}, + { 0x01e2, {1, {0x01e3}}}, + { 0x01e4, {1, {0x01e5}}}, + { 0x01e6, {1, {0x01e7}}}, + { 0x01e8, {1, {0x01e9}}}, + { 0x01ea, {1, {0x01eb}}}, + { 0x01ec, {1, {0x01ed}}}, + { 0x01ee, {1, {0x01ef}}}, + { 0x01f0, {2, {0x006a, 0x030c}}}, + { 0x01f1, {1, {0x01f3}}}, + { 0x01f2, {1, {0x01f3}}}, + { 0x01f4, {1, {0x01f5}}}, + { 0x01f6, {1, {0x0195}}}, + { 0x01f7, {1, {0x01bf}}}, + { 0x01f8, {1, {0x01f9}}}, + { 0x01fa, {1, {0x01fb}}}, + { 0x01fc, {1, {0x01fd}}}, + { 0x01fe, {1, {0x01ff}}}, + { 0x0200, {1, {0x0201}}}, + { 0x0202, {1, {0x0203}}}, + { 0x0204, {1, {0x0205}}}, + { 0x0206, {1, {0x0207}}}, + { 0x0208, {1, {0x0209}}}, + { 0x020a, {1, {0x020b}}}, + { 0x020c, {1, {0x020d}}}, + { 0x020e, {1, {0x020f}}}, + { 0x0210, {1, {0x0211}}}, + { 0x0212, {1, {0x0213}}}, + { 0x0214, {1, {0x0215}}}, + { 0x0216, {1, {0x0217}}}, + { 0x0218, {1, {0x0219}}}, + { 0x021a, {1, {0x021b}}}, + { 0x021c, {1, {0x021d}}}, + { 0x021e, {1, {0x021f}}}, + { 0x0220, {1, {0x019e}}}, + { 0x0222, {1, {0x0223}}}, + { 0x0224, {1, {0x0225}}}, + { 0x0226, {1, {0x0227}}}, + { 0x0228, {1, {0x0229}}}, + { 0x022a, {1, {0x022b}}}, + { 0x022c, {1, {0x022d}}}, + { 0x022e, {1, {0x022f}}}, + { 0x0230, {1, {0x0231}}}, + { 0x0232, {1, {0x0233}}}, + { 0x023b, {1, {0x023c}}}, + { 0x023d, {1, {0x019a}}}, + { 0x0241, {1, {0x0294}}}, + { 0x0345, {1, {0x03b9}}}, + { 0x0386, {1, {0x03ac}}}, + { 0x0388, {1, {0x03ad}}}, + { 0x0389, {1, {0x03ae}}}, + { 0x038a, {1, {0x03af}}}, + { 0x038c, {1, {0x03cc}}}, + { 0x038e, {1, {0x03cd}}}, + { 0x038f, {1, {0x03ce}}}, + { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x0391, {1, {0x03b1}}}, + { 0x0392, {1, {0x03b2}}}, + { 0x0393, {1, {0x03b3}}}, + { 0x0394, {1, {0x03b4}}}, + { 0x0395, {1, {0x03b5}}}, + { 0x0396, {1, {0x03b6}}}, + { 0x0397, {1, {0x03b7}}}, + { 0x0398, {1, {0x03b8}}}, + { 0x0399, {1, {0x03b9}}}, + { 0x039a, {1, {0x03ba}}}, + { 0x039b, {1, {0x03bb}}}, + { 0x039c, {1, {0x03bc}}}, + { 0x039d, {1, {0x03bd}}}, + { 0x039e, {1, {0x03be}}}, + { 0x039f, {1, {0x03bf}}}, + { 0x03a0, {1, {0x03c0}}}, + { 0x03a1, {1, {0x03c1}}}, + { 0x03a3, {1, {0x03c3}}}, + { 0x03a4, {1, {0x03c4}}}, + { 0x03a5, {1, {0x03c5}}}, + { 0x03a6, {1, {0x03c6}}}, + { 0x03a7, {1, {0x03c7}}}, + { 0x03a8, {1, {0x03c8}}}, + { 0x03a9, {1, {0x03c9}}}, + { 0x03aa, {1, {0x03ca}}}, + { 0x03ab, {1, {0x03cb}}}, + { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x03c2, {1, {0x03c3}}}, + { 0x03d0, {1, {0x03b2}}}, + { 0x03d1, {1, {0x03b8}}}, + { 0x03d5, {1, {0x03c6}}}, + { 0x03d6, {1, {0x03c0}}}, + { 0x03d8, {1, {0x03d9}}}, + { 0x03da, {1, {0x03db}}}, + { 0x03dc, {1, {0x03dd}}}, + { 0x03de, {1, {0x03df}}}, + { 0x03e0, {1, {0x03e1}}}, + { 0x03e2, {1, {0x03e3}}}, + { 0x03e4, {1, {0x03e5}}}, + { 0x03e6, {1, {0x03e7}}}, + { 0x03e8, {1, {0x03e9}}}, + { 0x03ea, {1, {0x03eb}}}, + { 0x03ec, {1, {0x03ed}}}, + { 0x03ee, {1, {0x03ef}}}, + { 0x03f0, {1, {0x03ba}}}, + { 0x03f1, {1, {0x03c1}}}, + { 0x03f4, {1, {0x03b8}}}, + { 0x03f5, {1, {0x03b5}}}, + { 0x03f7, {1, {0x03f8}}}, + { 0x03f9, {1, {0x03f2}}}, + { 0x03fa, {1, {0x03fb}}}, + { 0x0400, {1, {0x0450}}}, + { 0x0401, {1, {0x0451}}}, + { 0x0402, {1, {0x0452}}}, + { 0x0403, {1, {0x0453}}}, + { 0x0404, {1, {0x0454}}}, + { 0x0405, {1, {0x0455}}}, + { 0x0406, {1, {0x0456}}}, + { 0x0407, {1, {0x0457}}}, + { 0x0408, {1, {0x0458}}}, + { 0x0409, {1, {0x0459}}}, + { 0x040a, {1, {0x045a}}}, + { 0x040b, {1, {0x045b}}}, + { 0x040c, {1, {0x045c}}}, + { 0x040d, {1, {0x045d}}}, + { 0x040e, {1, {0x045e}}}, + { 0x040f, {1, {0x045f}}}, + { 0x0410, {1, {0x0430}}}, + { 0x0411, {1, {0x0431}}}, + { 0x0412, {1, {0x0432}}}, + { 0x0413, {1, {0x0433}}}, + { 0x0414, {1, {0x0434}}}, + { 0x0415, {1, {0x0435}}}, + { 0x0416, {1, {0x0436}}}, + { 0x0417, {1, {0x0437}}}, + { 0x0418, {1, {0x0438}}}, + { 0x0419, {1, {0x0439}}}, + { 0x041a, {1, {0x043a}}}, + { 0x041b, {1, {0x043b}}}, + { 0x041c, {1, {0x043c}}}, + { 0x041d, {1, {0x043d}}}, + { 0x041e, {1, {0x043e}}}, + { 0x041f, {1, {0x043f}}}, + { 0x0420, {1, {0x0440}}}, + { 0x0421, {1, {0x0441}}}, + { 0x0422, {1, {0x0442}}}, + { 0x0423, {1, {0x0443}}}, + { 0x0424, {1, {0x0444}}}, + { 0x0425, {1, {0x0445}}}, + { 0x0426, {1, {0x0446}}}, + { 0x0427, {1, {0x0447}}}, + { 0x0428, {1, {0x0448}}}, + { 0x0429, {1, {0x0449}}}, + { 0x042a, {1, {0x044a}}}, + { 0x042b, {1, {0x044b}}}, + { 0x042c, {1, {0x044c}}}, + { 0x042d, {1, {0x044d}}}, + { 0x042e, {1, {0x044e}}}, + { 0x042f, {1, {0x044f}}}, + { 0x0460, {1, {0x0461}}}, + { 0x0462, {1, {0x0463}}}, + { 0x0464, {1, {0x0465}}}, + { 0x0466, {1, {0x0467}}}, + { 0x0468, {1, {0x0469}}}, + { 0x046a, {1, {0x046b}}}, + { 0x046c, {1, {0x046d}}}, + { 0x046e, {1, {0x046f}}}, + { 0x0470, {1, {0x0471}}}, + { 0x0472, {1, {0x0473}}}, + { 0x0474, {1, {0x0475}}}, + { 0x0476, {1, {0x0477}}}, + { 0x0478, {1, {0x0479}}}, + { 0x047a, {1, {0x047b}}}, + { 0x047c, {1, {0x047d}}}, + { 0x047e, {1, {0x047f}}}, + { 0x0480, {1, {0x0481}}}, + { 0x048a, {1, {0x048b}}}, + { 0x048c, {1, {0x048d}}}, + { 0x048e, {1, {0x048f}}}, + { 0x0490, {1, {0x0491}}}, + { 0x0492, {1, {0x0493}}}, + { 0x0494, {1, {0x0495}}}, + { 0x0496, {1, {0x0497}}}, + { 0x0498, {1, {0x0499}}}, + { 0x049a, {1, {0x049b}}}, + { 0x049c, {1, {0x049d}}}, + { 0x049e, {1, {0x049f}}}, + { 0x04a0, {1, {0x04a1}}}, + { 0x04a2, {1, {0x04a3}}}, + { 0x04a4, {1, {0x04a5}}}, + { 0x04a6, {1, {0x04a7}}}, + { 0x04a8, {1, {0x04a9}}}, + { 0x04aa, {1, {0x04ab}}}, + { 0x04ac, {1, {0x04ad}}}, + { 0x04ae, {1, {0x04af}}}, + { 0x04b0, {1, {0x04b1}}}, + { 0x04b2, {1, {0x04b3}}}, + { 0x04b4, {1, {0x04b5}}}, + { 0x04b6, {1, {0x04b7}}}, + { 0x04b8, {1, {0x04b9}}}, + { 0x04ba, {1, {0x04bb}}}, + { 0x04bc, {1, {0x04bd}}}, + { 0x04be, {1, {0x04bf}}}, + { 0x04c1, {1, {0x04c2}}}, + { 0x04c3, {1, {0x04c4}}}, + { 0x04c5, {1, {0x04c6}}}, + { 0x04c7, {1, {0x04c8}}}, + { 0x04c9, {1, {0x04ca}}}, + { 0x04cb, {1, {0x04cc}}}, + { 0x04cd, {1, {0x04ce}}}, + { 0x04d0, {1, {0x04d1}}}, + { 0x04d2, {1, {0x04d3}}}, + { 0x04d4, {1, {0x04d5}}}, + { 0x04d6, {1, {0x04d7}}}, + { 0x04d8, {1, {0x04d9}}}, + { 0x04da, {1, {0x04db}}}, + { 0x04dc, {1, {0x04dd}}}, + { 0x04de, {1, {0x04df}}}, + { 0x04e0, {1, {0x04e1}}}, + { 0x04e2, {1, {0x04e3}}}, + { 0x04e4, {1, {0x04e5}}}, + { 0x04e6, {1, {0x04e7}}}, + { 0x04e8, {1, {0x04e9}}}, + { 0x04ea, {1, {0x04eb}}}, + { 0x04ec, {1, {0x04ed}}}, + { 0x04ee, {1, {0x04ef}}}, + { 0x04f0, {1, {0x04f1}}}, + { 0x04f2, {1, {0x04f3}}}, + { 0x04f4, {1, {0x04f5}}}, + { 0x04f6, {1, {0x04f7}}}, + { 0x04f8, {1, {0x04f9}}}, + { 0x0500, {1, {0x0501}}}, + { 0x0502, {1, {0x0503}}}, + { 0x0504, {1, {0x0505}}}, + { 0x0506, {1, {0x0507}}}, + { 0x0508, {1, {0x0509}}}, + { 0x050a, {1, {0x050b}}}, + { 0x050c, {1, {0x050d}}}, + { 0x050e, {1, {0x050f}}}, + { 0x0531, {1, {0x0561}}}, + { 0x0532, {1, {0x0562}}}, + { 0x0533, {1, {0x0563}}}, + { 0x0534, {1, {0x0564}}}, + { 0x0535, {1, {0x0565}}}, + { 0x0536, {1, {0x0566}}}, + { 0x0537, {1, {0x0567}}}, + { 0x0538, {1, {0x0568}}}, + { 0x0539, {1, {0x0569}}}, + { 0x053a, {1, {0x056a}}}, + { 0x053b, {1, {0x056b}}}, + { 0x053c, {1, {0x056c}}}, + { 0x053d, {1, {0x056d}}}, + { 0x053e, {1, {0x056e}}}, + { 0x053f, {1, {0x056f}}}, + { 0x0540, {1, {0x0570}}}, + { 0x0541, {1, {0x0571}}}, + { 0x0542, {1, {0x0572}}}, + { 0x0543, {1, {0x0573}}}, + { 0x0544, {1, {0x0574}}}, + { 0x0545, {1, {0x0575}}}, + { 0x0546, {1, {0x0576}}}, + { 0x0547, {1, {0x0577}}}, + { 0x0548, {1, {0x0578}}}, + { 0x0549, {1, {0x0579}}}, + { 0x054a, {1, {0x057a}}}, + { 0x054b, {1, {0x057b}}}, + { 0x054c, {1, {0x057c}}}, + { 0x054d, {1, {0x057d}}}, + { 0x054e, {1, {0x057e}}}, + { 0x054f, {1, {0x057f}}}, + { 0x0550, {1, {0x0580}}}, + { 0x0551, {1, {0x0581}}}, + { 0x0552, {1, {0x0582}}}, + { 0x0553, {1, {0x0583}}}, + { 0x0554, {1, {0x0584}}}, + { 0x0555, {1, {0x0585}}}, + { 0x0556, {1, {0x0586}}}, + { 0x0587, {2, {0x0565, 0x0582}}}, + { 0x10a0, {1, {0x2d00}}}, + { 0x10a1, {1, {0x2d01}}}, + { 0x10a2, {1, {0x2d02}}}, + { 0x10a3, {1, {0x2d03}}}, + { 0x10a4, {1, {0x2d04}}}, + { 0x10a5, {1, {0x2d05}}}, + { 0x10a6, {1, {0x2d06}}}, + { 0x10a7, {1, {0x2d07}}}, + { 0x10a8, {1, {0x2d08}}}, + { 0x10a9, {1, {0x2d09}}}, + { 0x10aa, {1, {0x2d0a}}}, + { 0x10ab, {1, {0x2d0b}}}, + { 0x10ac, {1, {0x2d0c}}}, + { 0x10ad, {1, {0x2d0d}}}, + { 0x10ae, {1, {0x2d0e}}}, + { 0x10af, {1, {0x2d0f}}}, + { 0x10b0, {1, {0x2d10}}}, + { 0x10b1, {1, {0x2d11}}}, + { 0x10b2, {1, {0x2d12}}}, + { 0x10b3, {1, {0x2d13}}}, + { 0x10b4, {1, {0x2d14}}}, + { 0x10b5, {1, {0x2d15}}}, + { 0x10b6, {1, {0x2d16}}}, + { 0x10b7, {1, {0x2d17}}}, + { 0x10b8, {1, {0x2d18}}}, + { 0x10b9, {1, {0x2d19}}}, + { 0x10ba, {1, {0x2d1a}}}, + { 0x10bb, {1, {0x2d1b}}}, + { 0x10bc, {1, {0x2d1c}}}, + { 0x10bd, {1, {0x2d1d}}}, + { 0x10be, {1, {0x2d1e}}}, + { 0x10bf, {1, {0x2d1f}}}, + { 0x10c0, {1, {0x2d20}}}, + { 0x10c1, {1, {0x2d21}}}, + { 0x10c2, {1, {0x2d22}}}, + { 0x10c3, {1, {0x2d23}}}, + { 0x10c4, {1, {0x2d24}}}, + { 0x10c5, {1, {0x2d25}}}, + { 0x1e00, {1, {0x1e01}}}, + { 0x1e02, {1, {0x1e03}}}, + { 0x1e04, {1, {0x1e05}}}, + { 0x1e06, {1, {0x1e07}}}, + { 0x1e08, {1, {0x1e09}}}, + { 0x1e0a, {1, {0x1e0b}}}, + { 0x1e0c, {1, {0x1e0d}}}, + { 0x1e0e, {1, {0x1e0f}}}, + { 0x1e10, {1, {0x1e11}}}, + { 0x1e12, {1, {0x1e13}}}, + { 0x1e14, {1, {0x1e15}}}, + { 0x1e16, {1, {0x1e17}}}, + { 0x1e18, {1, {0x1e19}}}, + { 0x1e1a, {1, {0x1e1b}}}, + { 0x1e1c, {1, {0x1e1d}}}, + { 0x1e1e, {1, {0x1e1f}}}, + { 0x1e20, {1, {0x1e21}}}, + { 0x1e22, {1, {0x1e23}}}, + { 0x1e24, {1, {0x1e25}}}, + { 0x1e26, {1, {0x1e27}}}, + { 0x1e28, {1, {0x1e29}}}, + { 0x1e2a, {1, {0x1e2b}}}, + { 0x1e2c, {1, {0x1e2d}}}, + { 0x1e2e, {1, {0x1e2f}}}, + { 0x1e30, {1, {0x1e31}}}, + { 0x1e32, {1, {0x1e33}}}, + { 0x1e34, {1, {0x1e35}}}, + { 0x1e36, {1, {0x1e37}}}, + { 0x1e38, {1, {0x1e39}}}, + { 0x1e3a, {1, {0x1e3b}}}, + { 0x1e3c, {1, {0x1e3d}}}, + { 0x1e3e, {1, {0x1e3f}}}, + { 0x1e40, {1, {0x1e41}}}, + { 0x1e42, {1, {0x1e43}}}, + { 0x1e44, {1, {0x1e45}}}, + { 0x1e46, {1, {0x1e47}}}, + { 0x1e48, {1, {0x1e49}}}, + { 0x1e4a, {1, {0x1e4b}}}, + { 0x1e4c, {1, {0x1e4d}}}, + { 0x1e4e, {1, {0x1e4f}}}, + { 0x1e50, {1, {0x1e51}}}, + { 0x1e52, {1, {0x1e53}}}, + { 0x1e54, {1, {0x1e55}}}, + { 0x1e56, {1, {0x1e57}}}, + { 0x1e58, {1, {0x1e59}}}, + { 0x1e5a, {1, {0x1e5b}}}, + { 0x1e5c, {1, {0x1e5d}}}, + { 0x1e5e, {1, {0x1e5f}}}, + { 0x1e60, {1, {0x1e61}}}, + { 0x1e62, {1, {0x1e63}}}, + { 0x1e64, {1, {0x1e65}}}, + { 0x1e66, {1, {0x1e67}}}, + { 0x1e68, {1, {0x1e69}}}, + { 0x1e6a, {1, {0x1e6b}}}, + { 0x1e6c, {1, {0x1e6d}}}, + { 0x1e6e, {1, {0x1e6f}}}, + { 0x1e70, {1, {0x1e71}}}, + { 0x1e72, {1, {0x1e73}}}, + { 0x1e74, {1, {0x1e75}}}, + { 0x1e76, {1, {0x1e77}}}, + { 0x1e78, {1, {0x1e79}}}, + { 0x1e7a, {1, {0x1e7b}}}, + { 0x1e7c, {1, {0x1e7d}}}, + { 0x1e7e, {1, {0x1e7f}}}, + { 0x1e80, {1, {0x1e81}}}, + { 0x1e82, {1, {0x1e83}}}, + { 0x1e84, {1, {0x1e85}}}, + { 0x1e86, {1, {0x1e87}}}, + { 0x1e88, {1, {0x1e89}}}, + { 0x1e8a, {1, {0x1e8b}}}, + { 0x1e8c, {1, {0x1e8d}}}, + { 0x1e8e, {1, {0x1e8f}}}, + { 0x1e90, {1, {0x1e91}}}, + { 0x1e92, {1, {0x1e93}}}, + { 0x1e94, {1, {0x1e95}}}, + { 0x1e96, {2, {0x0068, 0x0331}}}, + { 0x1e97, {2, {0x0074, 0x0308}}}, + { 0x1e98, {2, {0x0077, 0x030a}}}, + { 0x1e99, {2, {0x0079, 0x030a}}}, + { 0x1e9a, {2, {0x0061, 0x02be}}}, + { 0x1e9b, {1, {0x1e61}}}, + { 0x1ea0, {1, {0x1ea1}}}, + { 0x1ea2, {1, {0x1ea3}}}, + { 0x1ea4, {1, {0x1ea5}}}, + { 0x1ea6, {1, {0x1ea7}}}, + { 0x1ea8, {1, {0x1ea9}}}, + { 0x1eaa, {1, {0x1eab}}}, + { 0x1eac, {1, {0x1ead}}}, + { 0x1eae, {1, {0x1eaf}}}, + { 0x1eb0, {1, {0x1eb1}}}, + { 0x1eb2, {1, {0x1eb3}}}, + { 0x1eb4, {1, {0x1eb5}}}, + { 0x1eb6, {1, {0x1eb7}}}, + { 0x1eb8, {1, {0x1eb9}}}, + { 0x1eba, {1, {0x1ebb}}}, + { 0x1ebc, {1, {0x1ebd}}}, + { 0x1ebe, {1, {0x1ebf}}}, + { 0x1ec0, {1, {0x1ec1}}}, + { 0x1ec2, {1, {0x1ec3}}}, + { 0x1ec4, {1, {0x1ec5}}}, + { 0x1ec6, {1, {0x1ec7}}}, + { 0x1ec8, {1, {0x1ec9}}}, + { 0x1eca, {1, {0x1ecb}}}, + { 0x1ecc, {1, {0x1ecd}}}, + { 0x1ece, {1, {0x1ecf}}}, + { 0x1ed0, {1, {0x1ed1}}}, + { 0x1ed2, {1, {0x1ed3}}}, + { 0x1ed4, {1, {0x1ed5}}}, + { 0x1ed6, {1, {0x1ed7}}}, + { 0x1ed8, {1, {0x1ed9}}}, + { 0x1eda, {1, {0x1edb}}}, + { 0x1edc, {1, {0x1edd}}}, + { 0x1ede, {1, {0x1edf}}}, + { 0x1ee0, {1, {0x1ee1}}}, + { 0x1ee2, {1, {0x1ee3}}}, + { 0x1ee4, {1, {0x1ee5}}}, + { 0x1ee6, {1, {0x1ee7}}}, + { 0x1ee8, {1, {0x1ee9}}}, + { 0x1eea, {1, {0x1eeb}}}, + { 0x1eec, {1, {0x1eed}}}, + { 0x1eee, {1, {0x1eef}}}, + { 0x1ef0, {1, {0x1ef1}}}, + { 0x1ef2, {1, {0x1ef3}}}, + { 0x1ef4, {1, {0x1ef5}}}, + { 0x1ef6, {1, {0x1ef7}}}, + { 0x1ef8, {1, {0x1ef9}}}, + { 0x1f08, {1, {0x1f00}}}, + { 0x1f09, {1, {0x1f01}}}, + { 0x1f0a, {1, {0x1f02}}}, + { 0x1f0b, {1, {0x1f03}}}, + { 0x1f0c, {1, {0x1f04}}}, + { 0x1f0d, {1, {0x1f05}}}, + { 0x1f0e, {1, {0x1f06}}}, + { 0x1f0f, {1, {0x1f07}}}, + { 0x1f18, {1, {0x1f10}}}, + { 0x1f19, {1, {0x1f11}}}, + { 0x1f1a, {1, {0x1f12}}}, + { 0x1f1b, {1, {0x1f13}}}, + { 0x1f1c, {1, {0x1f14}}}, + { 0x1f1d, {1, {0x1f15}}}, + { 0x1f28, {1, {0x1f20}}}, + { 0x1f29, {1, {0x1f21}}}, + { 0x1f2a, {1, {0x1f22}}}, + { 0x1f2b, {1, {0x1f23}}}, + { 0x1f2c, {1, {0x1f24}}}, + { 0x1f2d, {1, {0x1f25}}}, + { 0x1f2e, {1, {0x1f26}}}, + { 0x1f2f, {1, {0x1f27}}}, + { 0x1f38, {1, {0x1f30}}}, + { 0x1f39, {1, {0x1f31}}}, + { 0x1f3a, {1, {0x1f32}}}, + { 0x1f3b, {1, {0x1f33}}}, + { 0x1f3c, {1, {0x1f34}}}, + { 0x1f3d, {1, {0x1f35}}}, + { 0x1f3e, {1, {0x1f36}}}, + { 0x1f3f, {1, {0x1f37}}}, + { 0x1f48, {1, {0x1f40}}}, + { 0x1f49, {1, {0x1f41}}}, + { 0x1f4a, {1, {0x1f42}}}, + { 0x1f4b, {1, {0x1f43}}}, + { 0x1f4c, {1, {0x1f44}}}, + { 0x1f4d, {1, {0x1f45}}}, + { 0x1f50, {2, {0x03c5, 0x0313}}}, + { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, + { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, + { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, + { 0x1f59, {1, {0x1f51}}}, + { 0x1f5b, {1, {0x1f53}}}, + { 0x1f5d, {1, {0x1f55}}}, + { 0x1f5f, {1, {0x1f57}}}, + { 0x1f68, {1, {0x1f60}}}, + { 0x1f69, {1, {0x1f61}}}, + { 0x1f6a, {1, {0x1f62}}}, + { 0x1f6b, {1, {0x1f63}}}, + { 0x1f6c, {1, {0x1f64}}}, + { 0x1f6d, {1, {0x1f65}}}, + { 0x1f6e, {1, {0x1f66}}}, + { 0x1f6f, {1, {0x1f67}}}, + { 0x1f80, {2, {0x1f00, 0x03b9}}}, + { 0x1f81, {2, {0x1f01, 0x03b9}}}, + { 0x1f82, {2, {0x1f02, 0x03b9}}}, + { 0x1f83, {2, {0x1f03, 0x03b9}}}, + { 0x1f84, {2, {0x1f04, 0x03b9}}}, + { 0x1f85, {2, {0x1f05, 0x03b9}}}, + { 0x1f86, {2, {0x1f06, 0x03b9}}}, + { 0x1f87, {2, {0x1f07, 0x03b9}}}, + { 0x1f88, {2, {0x1f00, 0x03b9}}}, + { 0x1f89, {2, {0x1f01, 0x03b9}}}, + { 0x1f8a, {2, {0x1f02, 0x03b9}}}, + { 0x1f8b, {2, {0x1f03, 0x03b9}}}, + { 0x1f8c, {2, {0x1f04, 0x03b9}}}, + { 0x1f8d, {2, {0x1f05, 0x03b9}}}, + { 0x1f8e, {2, {0x1f06, 0x03b9}}}, + { 0x1f8f, {2, {0x1f07, 0x03b9}}}, + { 0x1f90, {2, {0x1f20, 0x03b9}}}, + { 0x1f91, {2, {0x1f21, 0x03b9}}}, + { 0x1f92, {2, {0x1f22, 0x03b9}}}, + { 0x1f93, {2, {0x1f23, 0x03b9}}}, + { 0x1f94, {2, {0x1f24, 0x03b9}}}, + { 0x1f95, {2, {0x1f25, 0x03b9}}}, + { 0x1f96, {2, {0x1f26, 0x03b9}}}, + { 0x1f97, {2, {0x1f27, 0x03b9}}}, + { 0x1f98, {2, {0x1f20, 0x03b9}}}, + { 0x1f99, {2, {0x1f21, 0x03b9}}}, + { 0x1f9a, {2, {0x1f22, 0x03b9}}}, + { 0x1f9b, {2, {0x1f23, 0x03b9}}}, + { 0x1f9c, {2, {0x1f24, 0x03b9}}}, + { 0x1f9d, {2, {0x1f25, 0x03b9}}}, + { 0x1f9e, {2, {0x1f26, 0x03b9}}}, + { 0x1f9f, {2, {0x1f27, 0x03b9}}}, + { 0x1fa0, {2, {0x1f60, 0x03b9}}}, + { 0x1fa1, {2, {0x1f61, 0x03b9}}}, + { 0x1fa2, {2, {0x1f62, 0x03b9}}}, + { 0x1fa3, {2, {0x1f63, 0x03b9}}}, + { 0x1fa4, {2, {0x1f64, 0x03b9}}}, + { 0x1fa5, {2, {0x1f65, 0x03b9}}}, + { 0x1fa6, {2, {0x1f66, 0x03b9}}}, + { 0x1fa7, {2, {0x1f67, 0x03b9}}}, + { 0x1fa8, {2, {0x1f60, 0x03b9}}}, + { 0x1fa9, {2, {0x1f61, 0x03b9}}}, + { 0x1faa, {2, {0x1f62, 0x03b9}}}, + { 0x1fab, {2, {0x1f63, 0x03b9}}}, + { 0x1fac, {2, {0x1f64, 0x03b9}}}, + { 0x1fad, {2, {0x1f65, 0x03b9}}}, + { 0x1fae, {2, {0x1f66, 0x03b9}}}, + { 0x1faf, {2, {0x1f67, 0x03b9}}}, + { 0x1fb2, {2, {0x1f70, 0x03b9}}}, + { 0x1fb3, {2, {0x03b1, 0x03b9}}}, + { 0x1fb4, {2, {0x03ac, 0x03b9}}}, + { 0x1fb6, {2, {0x03b1, 0x0342}}}, + { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, + { 0x1fb8, {1, {0x1fb0}}}, + { 0x1fb9, {1, {0x1fb1}}}, + { 0x1fba, {1, {0x1f70}}}, + { 0x1fbb, {1, {0x1f71}}}, + { 0x1fbc, {2, {0x03b1, 0x03b9}}}, + { 0x1fbe, {1, {0x03b9}}}, + { 0x1fc2, {2, {0x1f74, 0x03b9}}}, + { 0x1fc3, {2, {0x03b7, 0x03b9}}}, + { 0x1fc4, {2, {0x03ae, 0x03b9}}}, + { 0x1fc6, {2, {0x03b7, 0x0342}}}, + { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, + { 0x1fc8, {1, {0x1f72}}}, + { 0x1fc9, {1, {0x1f73}}}, + { 0x1fca, {1, {0x1f74}}}, + { 0x1fcb, {1, {0x1f75}}}, + { 0x1fcc, {2, {0x03b7, 0x03b9}}}, + { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, + { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x1fd6, {2, {0x03b9, 0x0342}}}, + { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, + { 0x1fd8, {1, {0x1fd0}}}, + { 0x1fd9, {1, {0x1fd1}}}, + { 0x1fda, {1, {0x1f76}}}, + { 0x1fdb, {1, {0x1f77}}}, + { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, + { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x1fe4, {2, {0x03c1, 0x0313}}}, + { 0x1fe6, {2, {0x03c5, 0x0342}}}, + { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, + { 0x1fe8, {1, {0x1fe0}}}, + { 0x1fe9, {1, {0x1fe1}}}, + { 0x1fea, {1, {0x1f7a}}}, + { 0x1feb, {1, {0x1f7b}}}, + { 0x1fec, {1, {0x1fe5}}}, + { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, + { 0x1ff3, {2, {0x03c9, 0x03b9}}}, + { 0x1ff4, {2, {0x03ce, 0x03b9}}}, + { 0x1ff6, {2, {0x03c9, 0x0342}}}, + { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, + { 0x1ff8, {1, {0x1f78}}}, + { 0x1ff9, {1, {0x1f79}}}, + { 0x1ffa, {1, {0x1f7c}}}, + { 0x1ffb, {1, {0x1f7d}}}, + { 0x1ffc, {2, {0x03c9, 0x03b9}}}, + { 0x2126, {1, {0x03c9}}}, + { 0x212a, {1, {0x006b}}}, + { 0x212b, {1, {0x00e5}}}, + { 0x2160, {1, {0x2170}}}, + { 0x2161, {1, {0x2171}}}, + { 0x2162, {1, {0x2172}}}, + { 0x2163, {1, {0x2173}}}, + { 0x2164, {1, {0x2174}}}, + { 0x2165, {1, {0x2175}}}, + { 0x2166, {1, {0x2176}}}, + { 0x2167, {1, {0x2177}}}, + { 0x2168, {1, {0x2178}}}, + { 0x2169, {1, {0x2179}}}, + { 0x216a, {1, {0x217a}}}, + { 0x216b, {1, {0x217b}}}, + { 0x216c, {1, {0x217c}}}, + { 0x216d, {1, {0x217d}}}, + { 0x216e, {1, {0x217e}}}, + { 0x216f, {1, {0x217f}}}, + { 0x24b6, {1, {0x24d0}}}, + { 0x24b7, {1, {0x24d1}}}, + { 0x24b8, {1, {0x24d2}}}, + { 0x24b9, {1, {0x24d3}}}, + { 0x24ba, {1, {0x24d4}}}, + { 0x24bb, {1, {0x24d5}}}, + { 0x24bc, {1, {0x24d6}}}, + { 0x24bd, {1, {0x24d7}}}, + { 0x24be, {1, {0x24d8}}}, + { 0x24bf, {1, {0x24d9}}}, + { 0x24c0, {1, {0x24da}}}, + { 0x24c1, {1, {0x24db}}}, + { 0x24c2, {1, {0x24dc}}}, + { 0x24c3, {1, {0x24dd}}}, + { 0x24c4, {1, {0x24de}}}, + { 0x24c5, {1, {0x24df}}}, + { 0x24c6, {1, {0x24e0}}}, + { 0x24c7, {1, {0x24e1}}}, + { 0x24c8, {1, {0x24e2}}}, + { 0x24c9, {1, {0x24e3}}}, + { 0x24ca, {1, {0x24e4}}}, + { 0x24cb, {1, {0x24e5}}}, + { 0x24cc, {1, {0x24e6}}}, + { 0x24cd, {1, {0x24e7}}}, + { 0x24ce, {1, {0x24e8}}}, + { 0x24cf, {1, {0x24e9}}}, + { 0x2c00, {1, {0x2c30}}}, + { 0x2c01, {1, {0x2c31}}}, + { 0x2c02, {1, {0x2c32}}}, + { 0x2c03, {1, {0x2c33}}}, + { 0x2c04, {1, {0x2c34}}}, + { 0x2c05, {1, {0x2c35}}}, + { 0x2c06, {1, {0x2c36}}}, + { 0x2c07, {1, {0x2c37}}}, + { 0x2c08, {1, {0x2c38}}}, + { 0x2c09, {1, {0x2c39}}}, + { 0x2c0a, {1, {0x2c3a}}}, + { 0x2c0b, {1, {0x2c3b}}}, + { 0x2c0c, {1, {0x2c3c}}}, + { 0x2c0d, {1, {0x2c3d}}}, + { 0x2c0e, {1, {0x2c3e}}}, + { 0x2c0f, {1, {0x2c3f}}}, + { 0x2c10, {1, {0x2c40}}}, + { 0x2c11, {1, {0x2c41}}}, + { 0x2c12, {1, {0x2c42}}}, + { 0x2c13, {1, {0x2c43}}}, + { 0x2c14, {1, {0x2c44}}}, + { 0x2c15, {1, {0x2c45}}}, + { 0x2c16, {1, {0x2c46}}}, + { 0x2c17, {1, {0x2c47}}}, + { 0x2c18, {1, {0x2c48}}}, + { 0x2c19, {1, {0x2c49}}}, + { 0x2c1a, {1, {0x2c4a}}}, + { 0x2c1b, {1, {0x2c4b}}}, + { 0x2c1c, {1, {0x2c4c}}}, + { 0x2c1d, {1, {0x2c4d}}}, + { 0x2c1e, {1, {0x2c4e}}}, + { 0x2c1f, {1, {0x2c4f}}}, + { 0x2c20, {1, {0x2c50}}}, + { 0x2c21, {1, {0x2c51}}}, + { 0x2c22, {1, {0x2c52}}}, + { 0x2c23, {1, {0x2c53}}}, + { 0x2c24, {1, {0x2c54}}}, + { 0x2c25, {1, {0x2c55}}}, + { 0x2c26, {1, {0x2c56}}}, + { 0x2c27, {1, {0x2c57}}}, + { 0x2c28, {1, {0x2c58}}}, + { 0x2c29, {1, {0x2c59}}}, + { 0x2c2a, {1, {0x2c5a}}}, + { 0x2c2b, {1, {0x2c5b}}}, + { 0x2c2c, {1, {0x2c5c}}}, + { 0x2c2d, {1, {0x2c5d}}}, + { 0x2c2e, {1, {0x2c5e}}}, + { 0x2c80, {1, {0x2c81}}}, + { 0x2c82, {1, {0x2c83}}}, + { 0x2c84, {1, {0x2c85}}}, + { 0x2c86, {1, {0x2c87}}}, + { 0x2c88, {1, {0x2c89}}}, + { 0x2c8a, {1, {0x2c8b}}}, + { 0x2c8c, {1, {0x2c8d}}}, + { 0x2c8e, {1, {0x2c8f}}}, + { 0x2c90, {1, {0x2c91}}}, + { 0x2c92, {1, {0x2c93}}}, + { 0x2c94, {1, {0x2c95}}}, + { 0x2c96, {1, {0x2c97}}}, + { 0x2c98, {1, {0x2c99}}}, + { 0x2c9a, {1, {0x2c9b}}}, + { 0x2c9c, {1, {0x2c9d}}}, + { 0x2c9e, {1, {0x2c9f}}}, + { 0x2ca0, {1, {0x2ca1}}}, + { 0x2ca2, {1, {0x2ca3}}}, + { 0x2ca4, {1, {0x2ca5}}}, + { 0x2ca6, {1, {0x2ca7}}}, + { 0x2ca8, {1, {0x2ca9}}}, + { 0x2caa, {1, {0x2cab}}}, + { 0x2cac, {1, {0x2cad}}}, + { 0x2cae, {1, {0x2caf}}}, + { 0x2cb0, {1, {0x2cb1}}}, + { 0x2cb2, {1, {0x2cb3}}}, + { 0x2cb4, {1, {0x2cb5}}}, + { 0x2cb6, {1, {0x2cb7}}}, + { 0x2cb8, {1, {0x2cb9}}}, + { 0x2cba, {1, {0x2cbb}}}, + { 0x2cbc, {1, {0x2cbd}}}, + { 0x2cbe, {1, {0x2cbf}}}, + { 0x2cc0, {1, {0x2cc1}}}, + { 0x2cc2, {1, {0x2cc3}}}, + { 0x2cc4, {1, {0x2cc5}}}, + { 0x2cc6, {1, {0x2cc7}}}, + { 0x2cc8, {1, {0x2cc9}}}, + { 0x2cca, {1, {0x2ccb}}}, + { 0x2ccc, {1, {0x2ccd}}}, + { 0x2cce, {1, {0x2ccf}}}, + { 0x2cd0, {1, {0x2cd1}}}, + { 0x2cd2, {1, {0x2cd3}}}, + { 0x2cd4, {1, {0x2cd5}}}, + { 0x2cd6, {1, {0x2cd7}}}, + { 0x2cd8, {1, {0x2cd9}}}, + { 0x2cda, {1, {0x2cdb}}}, + { 0x2cdc, {1, {0x2cdd}}}, + { 0x2cde, {1, {0x2cdf}}}, + { 0x2ce0, {1, {0x2ce1}}}, + { 0x2ce2, {1, {0x2ce3}}}, + { 0xfb00, {2, {0x0066, 0x0066}}}, + { 0xfb01, {2, {0x0066, 0x0069}}}, + { 0xfb02, {2, {0x0066, 0x006c}}}, + { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, + { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, + { 0xfb05, {2, {0x0073, 0x0074}}}, + { 0xfb06, {2, {0x0073, 0x0074}}}, + { 0xfb13, {2, {0x0574, 0x0576}}}, + { 0xfb14, {2, {0x0574, 0x0565}}}, + { 0xfb15, {2, {0x0574, 0x056b}}}, + { 0xfb16, {2, {0x057e, 0x0576}}}, + { 0xfb17, {2, {0x0574, 0x056d}}}, + { 0xff21, {1, {0xff41}}}, + { 0xff22, {1, {0xff42}}}, + { 0xff23, {1, {0xff43}}}, + { 0xff24, {1, {0xff44}}}, + { 0xff25, {1, {0xff45}}}, + { 0xff26, {1, {0xff46}}}, + { 0xff27, {1, {0xff47}}}, + { 0xff28, {1, {0xff48}}}, + { 0xff29, {1, {0xff49}}}, + { 0xff2a, {1, {0xff4a}}}, + { 0xff2b, {1, {0xff4b}}}, + { 0xff2c, {1, {0xff4c}}}, + { 0xff2d, {1, {0xff4d}}}, + { 0xff2e, {1, {0xff4e}}}, + { 0xff2f, {1, {0xff4f}}}, + { 0xff30, {1, {0xff50}}}, + { 0xff31, {1, {0xff51}}}, + { 0xff32, {1, {0xff52}}}, + { 0xff33, {1, {0xff53}}}, + { 0xff34, {1, {0xff54}}}, + { 0xff35, {1, {0xff55}}}, + { 0xff36, {1, {0xff56}}}, + { 0xff37, {1, {0xff57}}}, + { 0xff38, {1, {0xff58}}}, + { 0xff39, {1, {0xff59}}}, + { 0xff3a, {1, {0xff5a}}}, + { 0x10400, {1, {0x10428}}}, + { 0x10401, {1, {0x10429}}}, + { 0x10402, {1, {0x1042a}}}, + { 0x10403, {1, {0x1042b}}}, + { 0x10404, {1, {0x1042c}}}, + { 0x10405, {1, {0x1042d}}}, + { 0x10406, {1, {0x1042e}}}, + { 0x10407, {1, {0x1042f}}}, + { 0x10408, {1, {0x10430}}}, + { 0x10409, {1, {0x10431}}}, + { 0x1040a, {1, {0x10432}}}, + { 0x1040b, {1, {0x10433}}}, + { 0x1040c, {1, {0x10434}}}, + { 0x1040d, {1, {0x10435}}}, + { 0x1040e, {1, {0x10436}}}, + { 0x1040f, {1, {0x10437}}}, + { 0x10410, {1, {0x10438}}}, + { 0x10411, {1, {0x10439}}}, + { 0x10412, {1, {0x1043a}}}, + { 0x10413, {1, {0x1043b}}}, + { 0x10414, {1, {0x1043c}}}, + { 0x10415, {1, {0x1043d}}}, + { 0x10416, {1, {0x1043e}}}, + { 0x10417, {1, {0x1043f}}}, + { 0x10418, {1, {0x10440}}}, + { 0x10419, {1, {0x10441}}}, + { 0x1041a, {1, {0x10442}}}, + { 0x1041b, {1, {0x10443}}}, + { 0x1041c, {1, {0x10444}}}, + { 0x1041d, {1, {0x10445}}}, + { 0x1041e, {1, {0x10446}}}, + { 0x1041f, {1, {0x10447}}}, + { 0x10420, {1, {0x10448}}}, + { 0x10421, {1, {0x10449}}}, + { 0x10422, {1, {0x1044a}}}, + { 0x10423, {1, {0x1044b}}}, + { 0x10424, {1, {0x1044c}}}, + { 0x10425, {1, {0x1044d}}}, + { 0x10426, {1, {0x1044e}}}, + { 0x10427, {1, {0x1044f}}} +}; + +static const CaseFold_11_Type CaseFold_Locale[] = { + { 0x0049, {1, {0x0069}}}, + { 0x0130, {2, {0x0069, 0x0307}}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11[] = { + { 0x0061, {1, {0x0041 }}}, + { 0x0062, {1, {0x0042 }}}, + { 0x0063, {1, {0x0043 }}}, + { 0x0064, {1, {0x0044 }}}, + { 0x0065, {1, {0x0045 }}}, + { 0x0066, {1, {0x0046 }}}, + { 0x0067, {1, {0x0047 }}}, + { 0x0068, {1, {0x0048 }}}, + { 0x006a, {1, {0x004a }}}, + { 0x006b, {2, {0x212a, 0x004b }}}, + { 0x006c, {1, {0x004c }}}, + { 0x006d, {1, {0x004d }}}, + { 0x006e, {1, {0x004e }}}, + { 0x006f, {1, {0x004f }}}, + { 0x0070, {1, {0x0050 }}}, + { 0x0071, {1, {0x0051 }}}, + { 0x0072, {1, {0x0052 }}}, + { 0x0073, {2, {0x0053, 0x017f }}}, + { 0x0074, {1, {0x0054 }}}, + { 0x0075, {1, {0x0055 }}}, + { 0x0076, {1, {0x0056 }}}, + { 0x0077, {1, {0x0057 }}}, + { 0x0078, {1, {0x0058 }}}, + { 0x0079, {1, {0x0059 }}}, + { 0x007a, {1, {0x005a }}}, + { 0x00e0, {1, {0x00c0 }}}, + { 0x00e1, {1, {0x00c1 }}}, + { 0x00e2, {1, {0x00c2 }}}, + { 0x00e3, {1, {0x00c3 }}}, + { 0x00e4, {1, {0x00c4 }}}, + { 0x00e5, {2, {0x212b, 0x00c5 }}}, + { 0x00e6, {1, {0x00c6 }}}, + { 0x00e7, {1, {0x00c7 }}}, + { 0x00e8, {1, {0x00c8 }}}, + { 0x00e9, {1, {0x00c9 }}}, + { 0x00ea, {1, {0x00ca }}}, + { 0x00eb, {1, {0x00cb }}}, + { 0x00ec, {1, {0x00cc }}}, + { 0x00ed, {1, {0x00cd }}}, + { 0x00ee, {1, {0x00ce }}}, + { 0x00ef, {1, {0x00cf }}}, + { 0x00f0, {1, {0x00d0 }}}, + { 0x00f1, {1, {0x00d1 }}}, + { 0x00f2, {1, {0x00d2 }}}, + { 0x00f3, {1, {0x00d3 }}}, + { 0x00f4, {1, {0x00d4 }}}, + { 0x00f5, {1, {0x00d5 }}}, + { 0x00f6, {1, {0x00d6 }}}, + { 0x00f8, {1, {0x00d8 }}}, + { 0x00f9, {1, {0x00d9 }}}, + { 0x00fa, {1, {0x00da }}}, + { 0x00fb, {1, {0x00db }}}, + { 0x00fc, {1, {0x00dc }}}, + { 0x00fd, {1, {0x00dd }}}, + { 0x00fe, {1, {0x00de }}}, + { 0x00ff, {1, {0x0178 }}}, + { 0x0101, {1, {0x0100 }}}, + { 0x0103, {1, {0x0102 }}}, + { 0x0105, {1, {0x0104 }}}, + { 0x0107, {1, {0x0106 }}}, + { 0x0109, {1, {0x0108 }}}, + { 0x010b, {1, {0x010a }}}, + { 0x010d, {1, {0x010c }}}, + { 0x010f, {1, {0x010e }}}, + { 0x0111, {1, {0x0110 }}}, + { 0x0113, {1, {0x0112 }}}, + { 0x0115, {1, {0x0114 }}}, + { 0x0117, {1, {0x0116 }}}, + { 0x0119, {1, {0x0118 }}}, + { 0x011b, {1, {0x011a }}}, + { 0x011d, {1, {0x011c }}}, + { 0x011f, {1, {0x011e }}}, + { 0x0121, {1, {0x0120 }}}, + { 0x0123, {1, {0x0122 }}}, + { 0x0125, {1, {0x0124 }}}, + { 0x0127, {1, {0x0126 }}}, + { 0x0129, {1, {0x0128 }}}, + { 0x012b, {1, {0x012a }}}, + { 0x012d, {1, {0x012c }}}, + { 0x012f, {1, {0x012e }}}, + { 0x0133, {1, {0x0132 }}}, + { 0x0135, {1, {0x0134 }}}, + { 0x0137, {1, {0x0136 }}}, + { 0x013a, {1, {0x0139 }}}, + { 0x013c, {1, {0x013b }}}, + { 0x013e, {1, {0x013d }}}, + { 0x0140, {1, {0x013f }}}, + { 0x0142, {1, {0x0141 }}}, + { 0x0144, {1, {0x0143 }}}, + { 0x0146, {1, {0x0145 }}}, + { 0x0148, {1, {0x0147 }}}, + { 0x014b, {1, {0x014a }}}, + { 0x014d, {1, {0x014c }}}, + { 0x014f, {1, {0x014e }}}, + { 0x0151, {1, {0x0150 }}}, + { 0x0153, {1, {0x0152 }}}, + { 0x0155, {1, {0x0154 }}}, + { 0x0157, {1, {0x0156 }}}, + { 0x0159, {1, {0x0158 }}}, + { 0x015b, {1, {0x015a }}}, + { 0x015d, {1, {0x015c }}}, + { 0x015f, {1, {0x015e }}}, + { 0x0161, {1, {0x0160 }}}, + { 0x0163, {1, {0x0162 }}}, + { 0x0165, {1, {0x0164 }}}, + { 0x0167, {1, {0x0166 }}}, + { 0x0169, {1, {0x0168 }}}, + { 0x016b, {1, {0x016a }}}, + { 0x016d, {1, {0x016c }}}, + { 0x016f, {1, {0x016e }}}, + { 0x0171, {1, {0x0170 }}}, + { 0x0173, {1, {0x0172 }}}, + { 0x0175, {1, {0x0174 }}}, + { 0x0177, {1, {0x0176 }}}, + { 0x017a, {1, {0x0179 }}}, + { 0x017c, {1, {0x017b }}}, + { 0x017e, {1, {0x017d }}}, + { 0x0183, {1, {0x0182 }}}, + { 0x0185, {1, {0x0184 }}}, + { 0x0188, {1, {0x0187 }}}, + { 0x018c, {1, {0x018b }}}, + { 0x0192, {1, {0x0191 }}}, + { 0x0195, {1, {0x01f6 }}}, + { 0x0199, {1, {0x0198 }}}, + { 0x019a, {1, {0x023d }}}, + { 0x019e, {1, {0x0220 }}}, + { 0x01a1, {1, {0x01a0 }}}, + { 0x01a3, {1, {0x01a2 }}}, + { 0x01a5, {1, {0x01a4 }}}, + { 0x01a8, {1, {0x01a7 }}}, + { 0x01ad, {1, {0x01ac }}}, + { 0x01b0, {1, {0x01af }}}, + { 0x01b4, {1, {0x01b3 }}}, + { 0x01b6, {1, {0x01b5 }}}, + { 0x01b9, {1, {0x01b8 }}}, + { 0x01bd, {1, {0x01bc }}}, + { 0x01bf, {1, {0x01f7 }}}, + { 0x01c6, {2, {0x01c4, 0x01c5 }}}, + { 0x01c9, {2, {0x01c7, 0x01c8 }}}, + { 0x01cc, {2, {0x01ca, 0x01cb }}}, + { 0x01ce, {1, {0x01cd }}}, + { 0x01d0, {1, {0x01cf }}}, + { 0x01d2, {1, {0x01d1 }}}, + { 0x01d4, {1, {0x01d3 }}}, + { 0x01d6, {1, {0x01d5 }}}, + { 0x01d8, {1, {0x01d7 }}}, + { 0x01da, {1, {0x01d9 }}}, + { 0x01dc, {1, {0x01db }}}, + { 0x01dd, {1, {0x018e }}}, + { 0x01df, {1, {0x01de }}}, + { 0x01e1, {1, {0x01e0 }}}, + { 0x01e3, {1, {0x01e2 }}}, + { 0x01e5, {1, {0x01e4 }}}, + { 0x01e7, {1, {0x01e6 }}}, + { 0x01e9, {1, {0x01e8 }}}, + { 0x01eb, {1, {0x01ea }}}, + { 0x01ed, {1, {0x01ec }}}, + { 0x01ef, {1, {0x01ee }}}, + { 0x01f3, {2, {0x01f1, 0x01f2 }}}, + { 0x01f5, {1, {0x01f4 }}}, + { 0x01f9, {1, {0x01f8 }}}, + { 0x01fb, {1, {0x01fa }}}, + { 0x01fd, {1, {0x01fc }}}, + { 0x01ff, {1, {0x01fe }}}, + { 0x0201, {1, {0x0200 }}}, + { 0x0203, {1, {0x0202 }}}, + { 0x0205, {1, {0x0204 }}}, + { 0x0207, {1, {0x0206 }}}, + { 0x0209, {1, {0x0208 }}}, + { 0x020b, {1, {0x020a }}}, + { 0x020d, {1, {0x020c }}}, + { 0x020f, {1, {0x020e }}}, + { 0x0211, {1, {0x0210 }}}, + { 0x0213, {1, {0x0212 }}}, + { 0x0215, {1, {0x0214 }}}, + { 0x0217, {1, {0x0216 }}}, + { 0x0219, {1, {0x0218 }}}, + { 0x021b, {1, {0x021a }}}, + { 0x021d, {1, {0x021c }}}, + { 0x021f, {1, {0x021e }}}, + { 0x0223, {1, {0x0222 }}}, + { 0x0225, {1, {0x0224 }}}, + { 0x0227, {1, {0x0226 }}}, + { 0x0229, {1, {0x0228 }}}, + { 0x022b, {1, {0x022a }}}, + { 0x022d, {1, {0x022c }}}, + { 0x022f, {1, {0x022e }}}, + { 0x0231, {1, {0x0230 }}}, + { 0x0233, {1, {0x0232 }}}, + { 0x023c, {1, {0x023b }}}, + { 0x0253, {1, {0x0181 }}}, + { 0x0254, {1, {0x0186 }}}, + { 0x0256, {1, {0x0189 }}}, + { 0x0257, {1, {0x018a }}}, + { 0x0259, {1, {0x018f }}}, + { 0x025b, {1, {0x0190 }}}, + { 0x0260, {1, {0x0193 }}}, + { 0x0263, {1, {0x0194 }}}, + { 0x0268, {1, {0x0197 }}}, + { 0x0269, {1, {0x0196 }}}, + { 0x026f, {1, {0x019c }}}, + { 0x0272, {1, {0x019d }}}, + { 0x0275, {1, {0x019f }}}, + { 0x0280, {1, {0x01a6 }}}, + { 0x0283, {1, {0x01a9 }}}, + { 0x0288, {1, {0x01ae }}}, + { 0x028a, {1, {0x01b1 }}}, + { 0x028b, {1, {0x01b2 }}}, + { 0x0292, {1, {0x01b7 }}}, + { 0x0294, {1, {0x0241 }}}, + { 0x03ac, {1, {0x0386 }}}, + { 0x03ad, {1, {0x0388 }}}, + { 0x03ae, {1, {0x0389 }}}, + { 0x03af, {1, {0x038a }}}, + { 0x03b1, {1, {0x0391 }}}, + { 0x03b2, {2, {0x0392, 0x03d0 }}}, + { 0x03b3, {1, {0x0393 }}}, + { 0x03b4, {1, {0x0394 }}}, + { 0x03b5, {2, {0x03f5, 0x0395 }}}, + { 0x03b6, {1, {0x0396 }}}, + { 0x03b7, {1, {0x0397 }}}, + { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, + { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, + { 0x03ba, {2, {0x03f0, 0x039a }}}, + { 0x03bb, {1, {0x039b }}}, + { 0x03bc, {2, {0x00b5, 0x039c }}}, + { 0x03bd, {1, {0x039d }}}, + { 0x03be, {1, {0x039e }}}, + { 0x03bf, {1, {0x039f }}}, + { 0x03c0, {2, {0x03a0, 0x03d6 }}}, + { 0x03c1, {2, {0x03f1, 0x03a1 }}}, + { 0x03c3, {2, {0x03a3, 0x03c2 }}}, + { 0x03c4, {1, {0x03a4 }}}, + { 0x03c5, {1, {0x03a5 }}}, + { 0x03c6, {2, {0x03a6, 0x03d5 }}}, + { 0x03c7, {1, {0x03a7 }}}, + { 0x03c8, {1, {0x03a8 }}}, + { 0x03c9, {2, {0x03a9, 0x2126 }}}, + { 0x03ca, {1, {0x03aa }}}, + { 0x03cb, {1, {0x03ab }}}, + { 0x03cc, {1, {0x038c }}}, + { 0x03cd, {1, {0x038e }}}, + { 0x03ce, {1, {0x038f }}}, + { 0x03d9, {1, {0x03d8 }}}, + { 0x03db, {1, {0x03da }}}, + { 0x03dd, {1, {0x03dc }}}, + { 0x03df, {1, {0x03de }}}, + { 0x03e1, {1, {0x03e0 }}}, + { 0x03e3, {1, {0x03e2 }}}, + { 0x03e5, {1, {0x03e4 }}}, + { 0x03e7, {1, {0x03e6 }}}, + { 0x03e9, {1, {0x03e8 }}}, + { 0x03eb, {1, {0x03ea }}}, + { 0x03ed, {1, {0x03ec }}}, + { 0x03ef, {1, {0x03ee }}}, + { 0x03f2, {1, {0x03f9 }}}, + { 0x03f8, {1, {0x03f7 }}}, + { 0x03fb, {1, {0x03fa }}}, + { 0x0430, {1, {0x0410 }}}, + { 0x0431, {1, {0x0411 }}}, + { 0x0432, {1, {0x0412 }}}, + { 0x0433, {1, {0x0413 }}}, + { 0x0434, {1, {0x0414 }}}, + { 0x0435, {1, {0x0415 }}}, + { 0x0436, {1, {0x0416 }}}, + { 0x0437, {1, {0x0417 }}}, + { 0x0438, {1, {0x0418 }}}, + { 0x0439, {1, {0x0419 }}}, + { 0x043a, {1, {0x041a }}}, + { 0x043b, {1, {0x041b }}}, + { 0x043c, {1, {0x041c }}}, + { 0x043d, {1, {0x041d }}}, + { 0x043e, {1, {0x041e }}}, + { 0x043f, {1, {0x041f }}}, + { 0x0440, {1, {0x0420 }}}, + { 0x0441, {1, {0x0421 }}}, + { 0x0442, {1, {0x0422 }}}, + { 0x0443, {1, {0x0423 }}}, + { 0x0444, {1, {0x0424 }}}, + { 0x0445, {1, {0x0425 }}}, + { 0x0446, {1, {0x0426 }}}, + { 0x0447, {1, {0x0427 }}}, + { 0x0448, {1, {0x0428 }}}, + { 0x0449, {1, {0x0429 }}}, + { 0x044a, {1, {0x042a }}}, + { 0x044b, {1, {0x042b }}}, + { 0x044c, {1, {0x042c }}}, + { 0x044d, {1, {0x042d }}}, + { 0x044e, {1, {0x042e }}}, + { 0x044f, {1, {0x042f }}}, + { 0x0450, {1, {0x0400 }}}, + { 0x0451, {1, {0x0401 }}}, + { 0x0452, {1, {0x0402 }}}, + { 0x0453, {1, {0x0403 }}}, + { 0x0454, {1, {0x0404 }}}, + { 0x0455, {1, {0x0405 }}}, + { 0x0456, {1, {0x0406 }}}, + { 0x0457, {1, {0x0407 }}}, + { 0x0458, {1, {0x0408 }}}, + { 0x0459, {1, {0x0409 }}}, + { 0x045a, {1, {0x040a }}}, + { 0x045b, {1, {0x040b }}}, + { 0x045c, {1, {0x040c }}}, + { 0x045d, {1, {0x040d }}}, + { 0x045e, {1, {0x040e }}}, + { 0x045f, {1, {0x040f }}}, + { 0x0461, {1, {0x0460 }}}, + { 0x0463, {1, {0x0462 }}}, + { 0x0465, {1, {0x0464 }}}, + { 0x0467, {1, {0x0466 }}}, + { 0x0469, {1, {0x0468 }}}, + { 0x046b, {1, {0x046a }}}, + { 0x046d, {1, {0x046c }}}, + { 0x046f, {1, {0x046e }}}, + { 0x0471, {1, {0x0470 }}}, + { 0x0473, {1, {0x0472 }}}, + { 0x0475, {1, {0x0474 }}}, + { 0x0477, {1, {0x0476 }}}, + { 0x0479, {1, {0x0478 }}}, + { 0x047b, {1, {0x047a }}}, + { 0x047d, {1, {0x047c }}}, + { 0x047f, {1, {0x047e }}}, + { 0x0481, {1, {0x0480 }}}, + { 0x048b, {1, {0x048a }}}, + { 0x048d, {1, {0x048c }}}, + { 0x048f, {1, {0x048e }}}, + { 0x0491, {1, {0x0490 }}}, + { 0x0493, {1, {0x0492 }}}, + { 0x0495, {1, {0x0494 }}}, + { 0x0497, {1, {0x0496 }}}, + { 0x0499, {1, {0x0498 }}}, + { 0x049b, {1, {0x049a }}}, + { 0x049d, {1, {0x049c }}}, + { 0x049f, {1, {0x049e }}}, + { 0x04a1, {1, {0x04a0 }}}, + { 0x04a3, {1, {0x04a2 }}}, + { 0x04a5, {1, {0x04a4 }}}, + { 0x04a7, {1, {0x04a6 }}}, + { 0x04a9, {1, {0x04a8 }}}, + { 0x04ab, {1, {0x04aa }}}, + { 0x04ad, {1, {0x04ac }}}, + { 0x04af, {1, {0x04ae }}}, + { 0x04b1, {1, {0x04b0 }}}, + { 0x04b3, {1, {0x04b2 }}}, + { 0x04b5, {1, {0x04b4 }}}, + { 0x04b7, {1, {0x04b6 }}}, + { 0x04b9, {1, {0x04b8 }}}, + { 0x04bb, {1, {0x04ba }}}, + { 0x04bd, {1, {0x04bc }}}, + { 0x04bf, {1, {0x04be }}}, + { 0x04c2, {1, {0x04c1 }}}, + { 0x04c4, {1, {0x04c3 }}}, + { 0x04c6, {1, {0x04c5 }}}, + { 0x04c8, {1, {0x04c7 }}}, + { 0x04ca, {1, {0x04c9 }}}, + { 0x04cc, {1, {0x04cb }}}, + { 0x04ce, {1, {0x04cd }}}, + { 0x04d1, {1, {0x04d0 }}}, + { 0x04d3, {1, {0x04d2 }}}, + { 0x04d5, {1, {0x04d4 }}}, + { 0x04d7, {1, {0x04d6 }}}, + { 0x04d9, {1, {0x04d8 }}}, + { 0x04db, {1, {0x04da }}}, + { 0x04dd, {1, {0x04dc }}}, + { 0x04df, {1, {0x04de }}}, + { 0x04e1, {1, {0x04e0 }}}, + { 0x04e3, {1, {0x04e2 }}}, + { 0x04e5, {1, {0x04e4 }}}, + { 0x04e7, {1, {0x04e6 }}}, + { 0x04e9, {1, {0x04e8 }}}, + { 0x04eb, {1, {0x04ea }}}, + { 0x04ed, {1, {0x04ec }}}, + { 0x04ef, {1, {0x04ee }}}, + { 0x04f1, {1, {0x04f0 }}}, + { 0x04f3, {1, {0x04f2 }}}, + { 0x04f5, {1, {0x04f4 }}}, + { 0x04f7, {1, {0x04f6 }}}, + { 0x04f9, {1, {0x04f8 }}}, + { 0x0501, {1, {0x0500 }}}, + { 0x0503, {1, {0x0502 }}}, + { 0x0505, {1, {0x0504 }}}, + { 0x0507, {1, {0x0506 }}}, + { 0x0509, {1, {0x0508 }}}, + { 0x050b, {1, {0x050a }}}, + { 0x050d, {1, {0x050c }}}, + { 0x050f, {1, {0x050e }}}, + { 0x0561, {1, {0x0531 }}}, + { 0x0562, {1, {0x0532 }}}, + { 0x0563, {1, {0x0533 }}}, + { 0x0564, {1, {0x0534 }}}, + { 0x0565, {1, {0x0535 }}}, + { 0x0566, {1, {0x0536 }}}, + { 0x0567, {1, {0x0537 }}}, + { 0x0568, {1, {0x0538 }}}, + { 0x0569, {1, {0x0539 }}}, + { 0x056a, {1, {0x053a }}}, + { 0x056b, {1, {0x053b }}}, + { 0x056c, {1, {0x053c }}}, + { 0x056d, {1, {0x053d }}}, + { 0x056e, {1, {0x053e }}}, + { 0x056f, {1, {0x053f }}}, + { 0x0570, {1, {0x0540 }}}, + { 0x0571, {1, {0x0541 }}}, + { 0x0572, {1, {0x0542 }}}, + { 0x0573, {1, {0x0543 }}}, + { 0x0574, {1, {0x0544 }}}, + { 0x0575, {1, {0x0545 }}}, + { 0x0576, {1, {0x0546 }}}, + { 0x0577, {1, {0x0547 }}}, + { 0x0578, {1, {0x0548 }}}, + { 0x0579, {1, {0x0549 }}}, + { 0x057a, {1, {0x054a }}}, + { 0x057b, {1, {0x054b }}}, + { 0x057c, {1, {0x054c }}}, + { 0x057d, {1, {0x054d }}}, + { 0x057e, {1, {0x054e }}}, + { 0x057f, {1, {0x054f }}}, + { 0x0580, {1, {0x0550 }}}, + { 0x0581, {1, {0x0551 }}}, + { 0x0582, {1, {0x0552 }}}, + { 0x0583, {1, {0x0553 }}}, + { 0x0584, {1, {0x0554 }}}, + { 0x0585, {1, {0x0555 }}}, + { 0x0586, {1, {0x0556 }}}, + { 0x1e01, {1, {0x1e00 }}}, + { 0x1e03, {1, {0x1e02 }}}, + { 0x1e05, {1, {0x1e04 }}}, + { 0x1e07, {1, {0x1e06 }}}, + { 0x1e09, {1, {0x1e08 }}}, + { 0x1e0b, {1, {0x1e0a }}}, + { 0x1e0d, {1, {0x1e0c }}}, + { 0x1e0f, {1, {0x1e0e }}}, + { 0x1e11, {1, {0x1e10 }}}, + { 0x1e13, {1, {0x1e12 }}}, + { 0x1e15, {1, {0x1e14 }}}, + { 0x1e17, {1, {0x1e16 }}}, + { 0x1e19, {1, {0x1e18 }}}, + { 0x1e1b, {1, {0x1e1a }}}, + { 0x1e1d, {1, {0x1e1c }}}, + { 0x1e1f, {1, {0x1e1e }}}, + { 0x1e21, {1, {0x1e20 }}}, + { 0x1e23, {1, {0x1e22 }}}, + { 0x1e25, {1, {0x1e24 }}}, + { 0x1e27, {1, {0x1e26 }}}, + { 0x1e29, {1, {0x1e28 }}}, + { 0x1e2b, {1, {0x1e2a }}}, + { 0x1e2d, {1, {0x1e2c }}}, + { 0x1e2f, {1, {0x1e2e }}}, + { 0x1e31, {1, {0x1e30 }}}, + { 0x1e33, {1, {0x1e32 }}}, + { 0x1e35, {1, {0x1e34 }}}, + { 0x1e37, {1, {0x1e36 }}}, + { 0x1e39, {1, {0x1e38 }}}, + { 0x1e3b, {1, {0x1e3a }}}, + { 0x1e3d, {1, {0x1e3c }}}, + { 0x1e3f, {1, {0x1e3e }}}, + { 0x1e41, {1, {0x1e40 }}}, + { 0x1e43, {1, {0x1e42 }}}, + { 0x1e45, {1, {0x1e44 }}}, + { 0x1e47, {1, {0x1e46 }}}, + { 0x1e49, {1, {0x1e48 }}}, + { 0x1e4b, {1, {0x1e4a }}}, + { 0x1e4d, {1, {0x1e4c }}}, + { 0x1e4f, {1, {0x1e4e }}}, + { 0x1e51, {1, {0x1e50 }}}, + { 0x1e53, {1, {0x1e52 }}}, + { 0x1e55, {1, {0x1e54 }}}, + { 0x1e57, {1, {0x1e56 }}}, + { 0x1e59, {1, {0x1e58 }}}, + { 0x1e5b, {1, {0x1e5a }}}, + { 0x1e5d, {1, {0x1e5c }}}, + { 0x1e5f, {1, {0x1e5e }}}, + { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, + { 0x1e63, {1, {0x1e62 }}}, + { 0x1e65, {1, {0x1e64 }}}, + { 0x1e67, {1, {0x1e66 }}}, + { 0x1e69, {1, {0x1e68 }}}, + { 0x1e6b, {1, {0x1e6a }}}, + { 0x1e6d, {1, {0x1e6c }}}, + { 0x1e6f, {1, {0x1e6e }}}, + { 0x1e71, {1, {0x1e70 }}}, + { 0x1e73, {1, {0x1e72 }}}, + { 0x1e75, {1, {0x1e74 }}}, + { 0x1e77, {1, {0x1e76 }}}, + { 0x1e79, {1, {0x1e78 }}}, + { 0x1e7b, {1, {0x1e7a }}}, + { 0x1e7d, {1, {0x1e7c }}}, + { 0x1e7f, {1, {0x1e7e }}}, + { 0x1e81, {1, {0x1e80 }}}, + { 0x1e83, {1, {0x1e82 }}}, + { 0x1e85, {1, {0x1e84 }}}, + { 0x1e87, {1, {0x1e86 }}}, + { 0x1e89, {1, {0x1e88 }}}, + { 0x1e8b, {1, {0x1e8a }}}, + { 0x1e8d, {1, {0x1e8c }}}, + { 0x1e8f, {1, {0x1e8e }}}, + { 0x1e91, {1, {0x1e90 }}}, + { 0x1e93, {1, {0x1e92 }}}, + { 0x1e95, {1, {0x1e94 }}}, + { 0x1ea1, {1, {0x1ea0 }}}, + { 0x1ea3, {1, {0x1ea2 }}}, + { 0x1ea5, {1, {0x1ea4 }}}, + { 0x1ea7, {1, {0x1ea6 }}}, + { 0x1ea9, {1, {0x1ea8 }}}, + { 0x1eab, {1, {0x1eaa }}}, + { 0x1ead, {1, {0x1eac }}}, + { 0x1eaf, {1, {0x1eae }}}, + { 0x1eb1, {1, {0x1eb0 }}}, + { 0x1eb3, {1, {0x1eb2 }}}, + { 0x1eb5, {1, {0x1eb4 }}}, + { 0x1eb7, {1, {0x1eb6 }}}, + { 0x1eb9, {1, {0x1eb8 }}}, + { 0x1ebb, {1, {0x1eba }}}, + { 0x1ebd, {1, {0x1ebc }}}, + { 0x1ebf, {1, {0x1ebe }}}, + { 0x1ec1, {1, {0x1ec0 }}}, + { 0x1ec3, {1, {0x1ec2 }}}, + { 0x1ec5, {1, {0x1ec4 }}}, + { 0x1ec7, {1, {0x1ec6 }}}, + { 0x1ec9, {1, {0x1ec8 }}}, + { 0x1ecb, {1, {0x1eca }}}, + { 0x1ecd, {1, {0x1ecc }}}, + { 0x1ecf, {1, {0x1ece }}}, + { 0x1ed1, {1, {0x1ed0 }}}, + { 0x1ed3, {1, {0x1ed2 }}}, + { 0x1ed5, {1, {0x1ed4 }}}, + { 0x1ed7, {1, {0x1ed6 }}}, + { 0x1ed9, {1, {0x1ed8 }}}, + { 0x1edb, {1, {0x1eda }}}, + { 0x1edd, {1, {0x1edc }}}, + { 0x1edf, {1, {0x1ede }}}, + { 0x1ee1, {1, {0x1ee0 }}}, + { 0x1ee3, {1, {0x1ee2 }}}, + { 0x1ee5, {1, {0x1ee4 }}}, + { 0x1ee7, {1, {0x1ee6 }}}, + { 0x1ee9, {1, {0x1ee8 }}}, + { 0x1eeb, {1, {0x1eea }}}, + { 0x1eed, {1, {0x1eec }}}, + { 0x1eef, {1, {0x1eee }}}, + { 0x1ef1, {1, {0x1ef0 }}}, + { 0x1ef3, {1, {0x1ef2 }}}, + { 0x1ef5, {1, {0x1ef4 }}}, + { 0x1ef7, {1, {0x1ef6 }}}, + { 0x1ef9, {1, {0x1ef8 }}}, + { 0x1f00, {1, {0x1f08 }}}, + { 0x1f01, {1, {0x1f09 }}}, + { 0x1f02, {1, {0x1f0a }}}, + { 0x1f03, {1, {0x1f0b }}}, + { 0x1f04, {1, {0x1f0c }}}, + { 0x1f05, {1, {0x1f0d }}}, + { 0x1f06, {1, {0x1f0e }}}, + { 0x1f07, {1, {0x1f0f }}}, + { 0x1f10, {1, {0x1f18 }}}, + { 0x1f11, {1, {0x1f19 }}}, + { 0x1f12, {1, {0x1f1a }}}, + { 0x1f13, {1, {0x1f1b }}}, + { 0x1f14, {1, {0x1f1c }}}, + { 0x1f15, {1, {0x1f1d }}}, + { 0x1f20, {1, {0x1f28 }}}, + { 0x1f21, {1, {0x1f29 }}}, + { 0x1f22, {1, {0x1f2a }}}, + { 0x1f23, {1, {0x1f2b }}}, + { 0x1f24, {1, {0x1f2c }}}, + { 0x1f25, {1, {0x1f2d }}}, + { 0x1f26, {1, {0x1f2e }}}, + { 0x1f27, {1, {0x1f2f }}}, + { 0x1f30, {1, {0x1f38 }}}, + { 0x1f31, {1, {0x1f39 }}}, + { 0x1f32, {1, {0x1f3a }}}, + { 0x1f33, {1, {0x1f3b }}}, + { 0x1f34, {1, {0x1f3c }}}, + { 0x1f35, {1, {0x1f3d }}}, + { 0x1f36, {1, {0x1f3e }}}, + { 0x1f37, {1, {0x1f3f }}}, + { 0x1f40, {1, {0x1f48 }}}, + { 0x1f41, {1, {0x1f49 }}}, + { 0x1f42, {1, {0x1f4a }}}, + { 0x1f43, {1, {0x1f4b }}}, + { 0x1f44, {1, {0x1f4c }}}, + { 0x1f45, {1, {0x1f4d }}}, + { 0x1f51, {1, {0x1f59 }}}, + { 0x1f53, {1, {0x1f5b }}}, + { 0x1f55, {1, {0x1f5d }}}, + { 0x1f57, {1, {0x1f5f }}}, + { 0x1f60, {1, {0x1f68 }}}, + { 0x1f61, {1, {0x1f69 }}}, + { 0x1f62, {1, {0x1f6a }}}, + { 0x1f63, {1, {0x1f6b }}}, + { 0x1f64, {1, {0x1f6c }}}, + { 0x1f65, {1, {0x1f6d }}}, + { 0x1f66, {1, {0x1f6e }}}, + { 0x1f67, {1, {0x1f6f }}}, + { 0x1f70, {1, {0x1fba }}}, + { 0x1f71, {1, {0x1fbb }}}, + { 0x1f72, {1, {0x1fc8 }}}, + { 0x1f73, {1, {0x1fc9 }}}, + { 0x1f74, {1, {0x1fca }}}, + { 0x1f75, {1, {0x1fcb }}}, + { 0x1f76, {1, {0x1fda }}}, + { 0x1f77, {1, {0x1fdb }}}, + { 0x1f78, {1, {0x1ff8 }}}, + { 0x1f79, {1, {0x1ff9 }}}, + { 0x1f7a, {1, {0x1fea }}}, + { 0x1f7b, {1, {0x1feb }}}, + { 0x1f7c, {1, {0x1ffa }}}, + { 0x1f7d, {1, {0x1ffb }}}, + { 0x1fb0, {1, {0x1fb8 }}}, + { 0x1fb1, {1, {0x1fb9 }}}, + { 0x1fd0, {1, {0x1fd8 }}}, + { 0x1fd1, {1, {0x1fd9 }}}, + { 0x1fe0, {1, {0x1fe8 }}}, + { 0x1fe1, {1, {0x1fe9 }}}, + { 0x1fe5, {1, {0x1fec }}}, + { 0x2170, {1, {0x2160 }}}, + { 0x2171, {1, {0x2161 }}}, + { 0x2172, {1, {0x2162 }}}, + { 0x2173, {1, {0x2163 }}}, + { 0x2174, {1, {0x2164 }}}, + { 0x2175, {1, {0x2165 }}}, + { 0x2176, {1, {0x2166 }}}, + { 0x2177, {1, {0x2167 }}}, + { 0x2178, {1, {0x2168 }}}, + { 0x2179, {1, {0x2169 }}}, + { 0x217a, {1, {0x216a }}}, + { 0x217b, {1, {0x216b }}}, + { 0x217c, {1, {0x216c }}}, + { 0x217d, {1, {0x216d }}}, + { 0x217e, {1, {0x216e }}}, + { 0x217f, {1, {0x216f }}}, + { 0x24d0, {1, {0x24b6 }}}, + { 0x24d1, {1, {0x24b7 }}}, + { 0x24d2, {1, {0x24b8 }}}, + { 0x24d3, {1, {0x24b9 }}}, + { 0x24d4, {1, {0x24ba }}}, + { 0x24d5, {1, {0x24bb }}}, + { 0x24d6, {1, {0x24bc }}}, + { 0x24d7, {1, {0x24bd }}}, + { 0x24d8, {1, {0x24be }}}, + { 0x24d9, {1, {0x24bf }}}, + { 0x24da, {1, {0x24c0 }}}, + { 0x24db, {1, {0x24c1 }}}, + { 0x24dc, {1, {0x24c2 }}}, + { 0x24dd, {1, {0x24c3 }}}, + { 0x24de, {1, {0x24c4 }}}, + { 0x24df, {1, {0x24c5 }}}, + { 0x24e0, {1, {0x24c6 }}}, + { 0x24e1, {1, {0x24c7 }}}, + { 0x24e2, {1, {0x24c8 }}}, + { 0x24e3, {1, {0x24c9 }}}, + { 0x24e4, {1, {0x24ca }}}, + { 0x24e5, {1, {0x24cb }}}, + { 0x24e6, {1, {0x24cc }}}, + { 0x24e7, {1, {0x24cd }}}, + { 0x24e8, {1, {0x24ce }}}, + { 0x24e9, {1, {0x24cf }}}, + { 0x2c30, {1, {0x2c00 }}}, + { 0x2c31, {1, {0x2c01 }}}, + { 0x2c32, {1, {0x2c02 }}}, + { 0x2c33, {1, {0x2c03 }}}, + { 0x2c34, {1, {0x2c04 }}}, + { 0x2c35, {1, {0x2c05 }}}, + { 0x2c36, {1, {0x2c06 }}}, + { 0x2c37, {1, {0x2c07 }}}, + { 0x2c38, {1, {0x2c08 }}}, + { 0x2c39, {1, {0x2c09 }}}, + { 0x2c3a, {1, {0x2c0a }}}, + { 0x2c3b, {1, {0x2c0b }}}, + { 0x2c3c, {1, {0x2c0c }}}, + { 0x2c3d, {1, {0x2c0d }}}, + { 0x2c3e, {1, {0x2c0e }}}, + { 0x2c3f, {1, {0x2c0f }}}, + { 0x2c40, {1, {0x2c10 }}}, + { 0x2c41, {1, {0x2c11 }}}, + { 0x2c42, {1, {0x2c12 }}}, + { 0x2c43, {1, {0x2c13 }}}, + { 0x2c44, {1, {0x2c14 }}}, + { 0x2c45, {1, {0x2c15 }}}, + { 0x2c46, {1, {0x2c16 }}}, + { 0x2c47, {1, {0x2c17 }}}, + { 0x2c48, {1, {0x2c18 }}}, + { 0x2c49, {1, {0x2c19 }}}, + { 0x2c4a, {1, {0x2c1a }}}, + { 0x2c4b, {1, {0x2c1b }}}, + { 0x2c4c, {1, {0x2c1c }}}, + { 0x2c4d, {1, {0x2c1d }}}, + { 0x2c4e, {1, {0x2c1e }}}, + { 0x2c4f, {1, {0x2c1f }}}, + { 0x2c50, {1, {0x2c20 }}}, + { 0x2c51, {1, {0x2c21 }}}, + { 0x2c52, {1, {0x2c22 }}}, + { 0x2c53, {1, {0x2c23 }}}, + { 0x2c54, {1, {0x2c24 }}}, + { 0x2c55, {1, {0x2c25 }}}, + { 0x2c56, {1, {0x2c26 }}}, + { 0x2c57, {1, {0x2c27 }}}, + { 0x2c58, {1, {0x2c28 }}}, + { 0x2c59, {1, {0x2c29 }}}, + { 0x2c5a, {1, {0x2c2a }}}, + { 0x2c5b, {1, {0x2c2b }}}, + { 0x2c5c, {1, {0x2c2c }}}, + { 0x2c5d, {1, {0x2c2d }}}, + { 0x2c5e, {1, {0x2c2e }}}, + { 0x2c81, {1, {0x2c80 }}}, + { 0x2c83, {1, {0x2c82 }}}, + { 0x2c85, {1, {0x2c84 }}}, + { 0x2c87, {1, {0x2c86 }}}, + { 0x2c89, {1, {0x2c88 }}}, + { 0x2c8b, {1, {0x2c8a }}}, + { 0x2c8d, {1, {0x2c8c }}}, + { 0x2c8f, {1, {0x2c8e }}}, + { 0x2c91, {1, {0x2c90 }}}, + { 0x2c93, {1, {0x2c92 }}}, + { 0x2c95, {1, {0x2c94 }}}, + { 0x2c97, {1, {0x2c96 }}}, + { 0x2c99, {1, {0x2c98 }}}, + { 0x2c9b, {1, {0x2c9a }}}, + { 0x2c9d, {1, {0x2c9c }}}, + { 0x2c9f, {1, {0x2c9e }}}, + { 0x2ca1, {1, {0x2ca0 }}}, + { 0x2ca3, {1, {0x2ca2 }}}, + { 0x2ca5, {1, {0x2ca4 }}}, + { 0x2ca7, {1, {0x2ca6 }}}, + { 0x2ca9, {1, {0x2ca8 }}}, + { 0x2cab, {1, {0x2caa }}}, + { 0x2cad, {1, {0x2cac }}}, + { 0x2caf, {1, {0x2cae }}}, + { 0x2cb1, {1, {0x2cb0 }}}, + { 0x2cb3, {1, {0x2cb2 }}}, + { 0x2cb5, {1, {0x2cb4 }}}, + { 0x2cb7, {1, {0x2cb6 }}}, + { 0x2cb9, {1, {0x2cb8 }}}, + { 0x2cbb, {1, {0x2cba }}}, + { 0x2cbd, {1, {0x2cbc }}}, + { 0x2cbf, {1, {0x2cbe }}}, + { 0x2cc1, {1, {0x2cc0 }}}, + { 0x2cc3, {1, {0x2cc2 }}}, + { 0x2cc5, {1, {0x2cc4 }}}, + { 0x2cc7, {1, {0x2cc6 }}}, + { 0x2cc9, {1, {0x2cc8 }}}, + { 0x2ccb, {1, {0x2cca }}}, + { 0x2ccd, {1, {0x2ccc }}}, + { 0x2ccf, {1, {0x2cce }}}, + { 0x2cd1, {1, {0x2cd0 }}}, + { 0x2cd3, {1, {0x2cd2 }}}, + { 0x2cd5, {1, {0x2cd4 }}}, + { 0x2cd7, {1, {0x2cd6 }}}, + { 0x2cd9, {1, {0x2cd8 }}}, + { 0x2cdb, {1, {0x2cda }}}, + { 0x2cdd, {1, {0x2cdc }}}, + { 0x2cdf, {1, {0x2cde }}}, + { 0x2ce1, {1, {0x2ce0 }}}, + { 0x2ce3, {1, {0x2ce2 }}}, + { 0x2d00, {1, {0x10a0 }}}, + { 0x2d01, {1, {0x10a1 }}}, + { 0x2d02, {1, {0x10a2 }}}, + { 0x2d03, {1, {0x10a3 }}}, + { 0x2d04, {1, {0x10a4 }}}, + { 0x2d05, {1, {0x10a5 }}}, + { 0x2d06, {1, {0x10a6 }}}, + { 0x2d07, {1, {0x10a7 }}}, + { 0x2d08, {1, {0x10a8 }}}, + { 0x2d09, {1, {0x10a9 }}}, + { 0x2d0a, {1, {0x10aa }}}, + { 0x2d0b, {1, {0x10ab }}}, + { 0x2d0c, {1, {0x10ac }}}, + { 0x2d0d, {1, {0x10ad }}}, + { 0x2d0e, {1, {0x10ae }}}, + { 0x2d0f, {1, {0x10af }}}, + { 0x2d10, {1, {0x10b0 }}}, + { 0x2d11, {1, {0x10b1 }}}, + { 0x2d12, {1, {0x10b2 }}}, + { 0x2d13, {1, {0x10b3 }}}, + { 0x2d14, {1, {0x10b4 }}}, + { 0x2d15, {1, {0x10b5 }}}, + { 0x2d16, {1, {0x10b6 }}}, + { 0x2d17, {1, {0x10b7 }}}, + { 0x2d18, {1, {0x10b8 }}}, + { 0x2d19, {1, {0x10b9 }}}, + { 0x2d1a, {1, {0x10ba }}}, + { 0x2d1b, {1, {0x10bb }}}, + { 0x2d1c, {1, {0x10bc }}}, + { 0x2d1d, {1, {0x10bd }}}, + { 0x2d1e, {1, {0x10be }}}, + { 0x2d1f, {1, {0x10bf }}}, + { 0x2d20, {1, {0x10c0 }}}, + { 0x2d21, {1, {0x10c1 }}}, + { 0x2d22, {1, {0x10c2 }}}, + { 0x2d23, {1, {0x10c3 }}}, + { 0x2d24, {1, {0x10c4 }}}, + { 0x2d25, {1, {0x10c5 }}}, + { 0xff41, {1, {0xff21 }}}, + { 0xff42, {1, {0xff22 }}}, + { 0xff43, {1, {0xff23 }}}, + { 0xff44, {1, {0xff24 }}}, + { 0xff45, {1, {0xff25 }}}, + { 0xff46, {1, {0xff26 }}}, + { 0xff47, {1, {0xff27 }}}, + { 0xff48, {1, {0xff28 }}}, + { 0xff49, {1, {0xff29 }}}, + { 0xff4a, {1, {0xff2a }}}, + { 0xff4b, {1, {0xff2b }}}, + { 0xff4c, {1, {0xff2c }}}, + { 0xff4d, {1, {0xff2d }}}, + { 0xff4e, {1, {0xff2e }}}, + { 0xff4f, {1, {0xff2f }}}, + { 0xff50, {1, {0xff30 }}}, + { 0xff51, {1, {0xff31 }}}, + { 0xff52, {1, {0xff32 }}}, + { 0xff53, {1, {0xff33 }}}, + { 0xff54, {1, {0xff34 }}}, + { 0xff55, {1, {0xff35 }}}, + { 0xff56, {1, {0xff36 }}}, + { 0xff57, {1, {0xff37 }}}, + { 0xff58, {1, {0xff38 }}}, + { 0xff59, {1, {0xff39 }}}, + { 0xff5a, {1, {0xff3a }}}, + { 0x10428, {1, {0x10400 }}}, + { 0x10429, {1, {0x10401 }}}, + { 0x1042a, {1, {0x10402 }}}, + { 0x1042b, {1, {0x10403 }}}, + { 0x1042c, {1, {0x10404 }}}, + { 0x1042d, {1, {0x10405 }}}, + { 0x1042e, {1, {0x10406 }}}, + { 0x1042f, {1, {0x10407 }}}, + { 0x10430, {1, {0x10408 }}}, + { 0x10431, {1, {0x10409 }}}, + { 0x10432, {1, {0x1040a }}}, + { 0x10433, {1, {0x1040b }}}, + { 0x10434, {1, {0x1040c }}}, + { 0x10435, {1, {0x1040d }}}, + { 0x10436, {1, {0x1040e }}}, + { 0x10437, {1, {0x1040f }}}, + { 0x10438, {1, {0x10410 }}}, + { 0x10439, {1, {0x10411 }}}, + { 0x1043a, {1, {0x10412 }}}, + { 0x1043b, {1, {0x10413 }}}, + { 0x1043c, {1, {0x10414 }}}, + { 0x1043d, {1, {0x10415 }}}, + { 0x1043e, {1, {0x10416 }}}, + { 0x1043f, {1, {0x10417 }}}, + { 0x10440, {1, {0x10418 }}}, + { 0x10441, {1, {0x10419 }}}, + { 0x10442, {1, {0x1041a }}}, + { 0x10443, {1, {0x1041b }}}, + { 0x10444, {1, {0x1041c }}}, + { 0x10445, {1, {0x1041d }}}, + { 0x10446, {1, {0x1041e }}}, + { 0x10447, {1, {0x1041f }}}, + { 0x10448, {1, {0x10420 }}}, + { 0x10449, {1, {0x10421 }}}, + { 0x1044a, {1, {0x10422 }}}, + { 0x1044b, {1, {0x10423 }}}, + { 0x1044c, {1, {0x10424 }}}, + { 0x1044d, {1, {0x10425 }}}, + { 0x1044e, {1, {0x10426 }}}, + { 0x1044f, {1, {0x10427 }}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { + { 0x0069, {1, {0x0049 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12[] = { + { {0x0061, 0x02be}, {1, {0x1e9a }}}, + { {0x0066, 0x0066}, {1, {0xfb00 }}}, + { {0x0066, 0x0069}, {1, {0xfb01 }}}, + { {0x0066, 0x006c}, {1, {0xfb02 }}}, + { {0x0068, 0x0331}, {1, {0x1e96 }}}, + { {0x006a, 0x030c}, {1, {0x01f0 }}}, + { {0x0073, 0x0073}, {1, {0x00df }}}, + { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, + { {0x0074, 0x0308}, {1, {0x1e97 }}}, + { {0x0077, 0x030a}, {1, {0x1e98 }}}, + { {0x0079, 0x030a}, {1, {0x1e99 }}}, + { {0x02bc, 0x006e}, {1, {0x0149 }}}, + { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, + { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, + { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, + { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, + { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, + { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, + { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, + { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, + { {0x03c5, 0x0313}, {1, {0x1f50 }}}, + { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, + { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, + { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, + { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, + { {0x0565, 0x0582}, {1, {0x0587 }}}, + { {0x0574, 0x0565}, {1, {0xfb14 }}}, + { {0x0574, 0x056b}, {1, {0xfb15 }}}, + { {0x0574, 0x056d}, {1, {0xfb17 }}}, + { {0x0574, 0x0576}, {1, {0xfb13 }}}, + { {0x057e, 0x0576}, {1, {0xfb16 }}}, + { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, + { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, + { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, + { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, + { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, + { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, + { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, + { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, + { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, + { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, + { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, + { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, + { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, + { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, + { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, + { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, + { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, + { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, + { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, + { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, + { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, + { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, + { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, + { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, + { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, + { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, + { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { + { {0x0069, 0x0307}, {1, {0x0130 }}} +}; + +static const CaseUnfold_13_Type CaseUnfold_13[] = { + { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, + { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, + { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, + { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, + { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, + { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, + { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, + { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, + { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, + { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, + { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, + { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, + { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, + { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} +}; + + +static PosixBracketEntryType HashEntryData[] = { + { (UChar* )"NEWLINE", 0, 7 }, + { (UChar* )"Alpha", 1, 5 }, + { (UChar* )"Blank", 2, 5 }, + { (UChar* )"Cntrl", 3, 5 }, + { (UChar* )"Digit", 4, 5 }, + { (UChar* )"Graph", 5, 5 }, + { (UChar* )"Lower", 6, 5 }, + { (UChar* )"Print", 7, 5 }, + { (UChar* )"Punct", 8, 5 }, + { (UChar* )"Space", 9, 5 }, + { (UChar* )"Upper", 10, 5 }, + { (UChar* )"XDigit", 11, 6 }, + { (UChar* )"Word", 12, 4 }, + { (UChar* )"Alnum", 13, 5 }, + { (UChar* )"ASCII", 14, 5 }, + +#ifdef USE_UNICODE_PROPERTIES + { (UChar* )"Any", 15, 3 }, + { (UChar* )"Assigned", 16, 8 }, + { (UChar* )"C", 17, 1 }, + { (UChar* )"Cc", 18, 2 }, + { (UChar* )"Cf", 19, 2 }, + { (UChar* )"Cn", 20, 2 }, + { (UChar* )"Co", 21, 2 }, + { (UChar* )"Cs", 22, 2 }, + { (UChar* )"L", 23, 1 }, + { (UChar* )"Ll", 24, 2 }, + { (UChar* )"Lm", 25, 2 }, + { (UChar* )"Lo", 26, 2 }, + { (UChar* )"Lt", 27, 2 }, + { (UChar* )"Lu", 28, 2 }, + { (UChar* )"M", 29, 1 }, + { (UChar* )"Mc", 30, 2 }, + { (UChar* )"Me", 31, 2 }, + { (UChar* )"Mn", 32, 2 }, + { (UChar* )"N", 33, 1 }, + { (UChar* )"Nd", 34, 2 }, + { (UChar* )"Nl", 35, 2 }, + { (UChar* )"No", 36, 2 }, + { (UChar* )"P", 37, 1 }, + { (UChar* )"Pc", 38, 2 }, + { (UChar* )"Pd", 39, 2 }, + { (UChar* )"Pe", 40, 2 }, + { (UChar* )"Pf", 41, 2 }, + { (UChar* )"Pi", 42, 2 }, + { (UChar* )"Po", 43, 2 }, + { (UChar* )"Ps", 44, 2 }, + { (UChar* )"S", 45, 1 }, + { (UChar* )"Sc", 46, 2 }, + { (UChar* )"Sk", 47, 2 }, + { (UChar* )"Sm", 48, 2 }, + { (UChar* )"So", 49, 2 }, + { (UChar* )"Z", 50, 1 }, + { (UChar* )"Zl", 51, 2 }, + { (UChar* )"Zp", 52, 2 }, + { (UChar* )"Zs", 53, 2 }, + { (UChar* )"Arabic", 54, 6 }, + { (UChar* )"Armenian", 55, 8 }, + { (UChar* )"Bengali", 56, 7 }, + { (UChar* )"Bopomofo", 57, 8 }, + { (UChar* )"Braille", 58, 7 }, + { (UChar* )"Buginese", 59, 8 }, + { (UChar* )"Buhid", 60, 5 }, + { (UChar* )"Canadian_Aboriginal", 61, 19 }, + { (UChar* )"Cherokee", 62, 8 }, + { (UChar* )"Common", 63, 6 }, + { (UChar* )"Coptic", 64, 6 }, + { (UChar* )"Cypriot", 65, 7 }, + { (UChar* )"Cyrillic", 66, 8 }, + { (UChar* )"Deseret", 67, 7 }, + { (UChar* )"Devanagari", 68, 10 }, + { (UChar* )"Ethiopic", 69, 8 }, + { (UChar* )"Georgian", 70, 8 }, + { (UChar* )"Glagolitic", 71, 10 }, + { (UChar* )"Gothic", 72, 6 }, + { (UChar* )"Greek", 73, 5 }, + { (UChar* )"Gujarati", 74, 8 }, + { (UChar* )"Gurmukhi", 75, 8 }, + { (UChar* )"Han", 76, 3 }, + { (UChar* )"Hangul", 77, 6 }, + { (UChar* )"Hanunoo", 78, 7 }, + { (UChar* )"Hebrew", 79, 6 }, + { (UChar* )"Hiragana", 80, 8 }, + { (UChar* )"Inherited", 81, 9 }, + { (UChar* )"Kannada", 82, 7 }, + { (UChar* )"Katakana", 83, 8 }, + { (UChar* )"Kharoshthi", 84, 10 }, + { (UChar* )"Khmer", 85, 5 }, + { (UChar* )"Lao", 86, 3 }, + { (UChar* )"Latin", 87, 5 }, + { (UChar* )"Limbu", 88, 5 }, + { (UChar* )"Linear_B", 89, 8 }, + { (UChar* )"Malayalam", 90, 9 }, + { (UChar* )"Mongolian", 91, 9 }, + { (UChar* )"Myanmar", 92, 7 }, + { (UChar* )"New_Tai_Lue", 93, 11 }, + { (UChar* )"Ogham", 94, 5 }, + { (UChar* )"Old_Italic", 95, 10 }, + { (UChar* )"Old_Persian", 96, 11 }, + { (UChar* )"Oriya", 97, 5 }, + { (UChar* )"Osmanya", 98, 7 }, + { (UChar* )"Runic", 99, 5 }, + { (UChar* )"Shavian", 100, 7 }, + { (UChar* )"Sinhala", 101, 7 }, + { (UChar* )"Syloti_Nagri", 102, 12 }, + { (UChar* )"Syriac", 103, 6 }, + { (UChar* )"Tagalog", 104, 7 }, + { (UChar* )"Tagbanwa", 105, 8 }, + { (UChar* )"Tai_Le", 106, 6 }, + { (UChar* )"Tamil", 107, 5 }, + { (UChar* )"Telugu", 108, 6 }, + { (UChar* )"Thaana", 109, 6 }, + { (UChar* )"Thai", 110, 4 }, + { (UChar* )"Tibetan", 111, 7 }, + { (UChar* )"Tifinagh", 112, 8 }, + { (UChar* )"Ugaritic", 113, 8 }, + { (UChar* )"Yi", 114, 2 }, +#endif /* USE_UNICODE_PROPERTIES */ + { (UChar* )NULL, -1, 0 } +}; + +#ifdef USE_UNICODE_PROPERTIES +#define CODE_RANGES_NUM 115 +#else +#define CODE_RANGES_NUM 15 +#endif + +static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; +static int CodeRangeTableInited = 0; + +static void init_code_range_array(void) { + THREAD_ATOMIC_START; + + CodeRanges[0] = CR_NEWLINE; + CodeRanges[1] = CR_Alpha; + CodeRanges[2] = CR_Blank; + CodeRanges[3] = CR_Cntrl; + CodeRanges[4] = CR_Digit; + CodeRanges[5] = CR_Graph; + CodeRanges[6] = CR_Lower; + CodeRanges[7] = CR_Print; + CodeRanges[8] = CR_Punct; + CodeRanges[9] = CR_Space; + CodeRanges[10] = CR_Upper; + CodeRanges[11] = CR_XDigit; + CodeRanges[12] = CR_Word; + CodeRanges[13] = CR_Alnum; + CodeRanges[14] = CR_ASCII; + +#ifdef USE_UNICODE_PROPERTIES + CodeRanges[15] = CR_Any; + CodeRanges[16] = CR_Assigned; + CodeRanges[17] = CR_C; + CodeRanges[18] = CR_Cc; + CodeRanges[19] = CR_Cf; + CodeRanges[20] = CR_Cn; + CodeRanges[21] = CR_Co; + CodeRanges[22] = CR_Cs; + CodeRanges[23] = CR_L; + CodeRanges[24] = CR_Ll; + CodeRanges[25] = CR_Lm; + CodeRanges[26] = CR_Lo; + CodeRanges[27] = CR_Lt; + CodeRanges[28] = CR_Lu; + CodeRanges[29] = CR_M; + CodeRanges[30] = CR_Mc; + CodeRanges[31] = CR_Me; + CodeRanges[32] = CR_Mn; + CodeRanges[33] = CR_N; + CodeRanges[34] = CR_Nd; + CodeRanges[35] = CR_Nl; + CodeRanges[36] = CR_No; + CodeRanges[37] = CR_P; + CodeRanges[38] = CR_Pc; + CodeRanges[39] = CR_Pd; + CodeRanges[40] = CR_Pe; + CodeRanges[41] = CR_Pf; + CodeRanges[42] = CR_Pi; + CodeRanges[43] = CR_Po; + CodeRanges[44] = CR_Ps; + CodeRanges[45] = CR_S; + CodeRanges[46] = CR_Sc; + CodeRanges[47] = CR_Sk; + CodeRanges[48] = CR_Sm; + CodeRanges[49] = CR_So; + CodeRanges[50] = CR_Z; + CodeRanges[51] = CR_Zl; + CodeRanges[52] = CR_Zp; + CodeRanges[53] = CR_Zs; + CodeRanges[54] = CR_Arabic; + CodeRanges[55] = CR_Armenian; + CodeRanges[56] = CR_Bengali; + CodeRanges[57] = CR_Bopomofo; + CodeRanges[58] = CR_Braille; + CodeRanges[59] = CR_Buginese; + CodeRanges[60] = CR_Buhid; + CodeRanges[61] = CR_Canadian_Aboriginal; + CodeRanges[62] = CR_Cherokee; + CodeRanges[63] = CR_Common; + CodeRanges[64] = CR_Coptic; + CodeRanges[65] = CR_Cypriot; + CodeRanges[66] = CR_Cyrillic; + CodeRanges[67] = CR_Deseret; + CodeRanges[68] = CR_Devanagari; + CodeRanges[69] = CR_Ethiopic; + CodeRanges[70] = CR_Georgian; + CodeRanges[71] = CR_Glagolitic; + CodeRanges[72] = CR_Gothic; + CodeRanges[73] = CR_Greek; + CodeRanges[74] = CR_Gujarati; + CodeRanges[75] = CR_Gurmukhi; + CodeRanges[76] = CR_Han; + CodeRanges[77] = CR_Hangul; + CodeRanges[78] = CR_Hanunoo; + CodeRanges[79] = CR_Hebrew; + CodeRanges[80] = CR_Hiragana; + CodeRanges[81] = CR_Inherited; + CodeRanges[82] = CR_Kannada; + CodeRanges[83] = CR_Katakana; + CodeRanges[84] = CR_Kharoshthi; + CodeRanges[85] = CR_Khmer; + CodeRanges[86] = CR_Lao; + CodeRanges[87] = CR_Latin; + CodeRanges[88] = CR_Limbu; + CodeRanges[89] = CR_Linear_B; + CodeRanges[90] = CR_Malayalam; + CodeRanges[91] = CR_Mongolian; + CodeRanges[92] = CR_Myanmar; + CodeRanges[93] = CR_New_Tai_Lue; + CodeRanges[94] = CR_Ogham; + CodeRanges[95] = CR_Old_Italic; + CodeRanges[96] = CR_Old_Persian; + CodeRanges[97] = CR_Oriya; + CodeRanges[98] = CR_Osmanya; + CodeRanges[99] = CR_Runic; + CodeRanges[100] = CR_Shavian; + CodeRanges[101] = CR_Sinhala; + CodeRanges[102] = CR_Syloti_Nagri; + CodeRanges[103] = CR_Syriac; + CodeRanges[104] = CR_Tagalog; + CodeRanges[105] = CR_Tagbanwa; + CodeRanges[106] = CR_Tai_Le; + CodeRanges[107] = CR_Tamil; + CodeRanges[108] = CR_Telugu; + CodeRanges[109] = CR_Thaana; + CodeRanges[110] = CR_Thai; + CodeRanges[111] = CR_Tibetan; + CodeRanges[112] = CR_Tifinagh; + CodeRanges[113] = CR_Ugaritic; + CodeRanges[114] = CR_Yi; +#endif /* USE_UNICODE_PROPERTIES */ + + CodeRangeTableInited = 1; + THREAD_ATOMIC_END; +} + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if ( +#ifdef USE_UNICODE_PROPERTIES + ctype <= ONIGENC_MAX_STD_CTYPE && +#endif + code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); +} + + +extern int +onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) +{ + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + *ranges = CodeRanges[ctype]; + + return 0; +} + +extern int +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + *sb_out = 0x00; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + +#include "st.h" + +#define PROPERTY_NAME_MAX_SIZE 20 + +static st_table* NameCtypeTable; +static int NameTableInited = 0; + +static int init_name_ctype_table(void) +{ + PosixBracketEntryType *pb; + + THREAD_ATOMIC_START; + + NameCtypeTable = onig_st_init_strend_table_with_size(100); + if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; + + for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { + onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, + (st_data_t )pb->ctype); + } + + NameTableInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) +{ + int len; + hash_data_type ctype; + UChar buf[PROPERTY_NAME_MAX_SIZE]; + UChar *p; + OnigCodePoint code; + + p = name; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + buf[len++] = (UChar )code; + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + p += enclen(enc, p); + } + + buf[len] = 0; + + if (NameTableInited == 0) init_name_ctype_table(); + + if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) { + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + return (int )ctype; +} + + +static int +code2_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1]) return 0; + return 1; +} + +static int +code2_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1]); +} + +static struct st_hash_type type_code2_hash = { + code2_cmp, + code2_hash, +}; + +static int +code3_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; + return 1; +} + +static int +code3_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1] + x[2]); +} + +static struct st_hash_type type_code3_hash = { + code3_cmp, + code3_hash, +}; + + +static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ +static st_table* Unfold1Table; +static st_table* Unfold2Table; +static st_table* Unfold3Table; +static int CaseFoldInited = 0; + +static int init_case_fold_table(void) +{ + const CaseFold_11_Type *p; + const CaseUnfold_11_Type *p1; + const CaseUnfold_12_Type *p2; + const CaseUnfold_13_Type *p3; + int i; + + THREAD_ATOMIC_START; + + FoldTable = st_init_numtable_with_size(1200); + if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; + for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { + p = &CaseFold[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); + i++) { + p = &CaseFold_Locale[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + + Unfold1Table = st_init_numtable_with_size(1000); + if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11_Locale[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + + Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); + if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12_Locale[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + + Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); + if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + p3 = &CaseUnfold_13[i]; + st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); + } + + CaseFoldInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_mbc_case_fold(OnigEncoding enc, + OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, + UChar* fold) +{ + CodePointList3 *to; + OnigCodePoint code; + int i, len, rlen; + const UChar *p = *pp; + + if (CaseFoldInited == 0) init_case_fold_table(); + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p); + *pp += len; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); + } + else if (code == 0x0130) { + return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); + } +#if 0 + /* NO NEEDS TO CHECK */ + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { +#else + else { +#endif + rlen = 0; + for (i = 0; i < to->n; i++) { + len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); + fold += len; + rlen += len; + } + return rlen; + } + } + + for (i = 0; i < len; i++) { + *fold++ = *p++; + } + return len; +} + +extern int +onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + const CaseUnfold_11_Type* p11; + OnigCodePoint code; + int i, j, k, r; + + /* if (CaseFoldInited == 0) init_case_fold_table(); */ + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + code = 0x0131; + r = (*f)(0x0049, &code, 1, arg); + if (r != 0) return r; + code = 0x0049; + r = (*f)(0x0131, &code, 1, arg); + if (r != 0) return r; + + code = 0x0130; + r = (*f)(0x0069, &code, 1, arg); + if (r != 0) return r; + code = 0x0069; + r = (*f)(0x0130, &code, 1, arg); + if (r != 0) return r; + } + else { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11_Locale[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), + 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12[i].to.n; j++) { + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + for (j = 0; j < CaseUnfold_13[i].to.n; j++) { + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_13[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + } + + return 0; +} + +extern int +onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, + OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + int n, i, j, k, len; + OnigCodePoint code, codes[3]; + CodePointList3 *to, *z3; + CodePointList2 *z2; + + if (CaseFoldInited == 0) init_case_fold_table(); + + n = 0; + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p); + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0131; + return 1; + } + else if (code == 0x0130) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0069; + return 1; + } + else if (code == 0x0131) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0049; + return 1; + } + else if (code == 0x0069) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0130; + return 1; + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + OnigCodePoint orig_code = code; + + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = to->code[0]; + n++; + + code = to->code[0]; + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + if (to->code[i] != orig_code) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + } + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + OnigCodePoint cs[3][4]; + int fn, ncs[3]; + + for (fn = 0; fn < to->n; fn++) { + cs[fn][0] = to->code[fn]; + if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], + (void* )&z3) != 0) { + for (i = 0; i < z3->n; i++) { + cs[fn][i+1] = z3->code[i]; + } + ncs[fn] = z3->n + 1; + } + else + ncs[fn] = 1; + } + + if (fn == 2) { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + items[n].byte_len = len; + items[n].code_len = 2; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + n++; + } + } + + if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + else { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + for (k = 0; k < ncs[2]; k++) { + items[n].byte_len = len; + items[n].code_len = 3; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + items[n].code[2] = cs[2][k]; + n++; + } + } + } + + if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + + /* multi char folded code is not head of another folded multi char */ + flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ + } + } + else { + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + p += len; + if (p < end) { + int clen; + + codes[0] = code; + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[1] = to->code[0]; + } + else + codes[1] = code; + + clen = enclen(enc, p); + len += clen; + if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + + p += clen; + if (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[2] = to->code[0]; + } + else + codes[2] = code; + + clen = enclen(enc, p); + len += clen; + if (onig_st_lookup(Unfold3Table, (st_data_t )codes, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + } + } + + return n; } diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c index 6ab80a6c1c..1e909ebbf2 100755 --- a/ext/mbstring/oniguruma/enc/utf16_be.c +++ b/ext/mbstring/oniguruma/enc/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,9 +29,6 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) -#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) - static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -64,7 +61,11 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end) if (*(p+1) == 0x0a && *p == 0x00) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00) + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+1) == 0x0d || +#endif + *(p+1) == 0x85) && *p == 0x00) return 1; if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) return 1; @@ -74,7 +75,7 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end) } static OnigCodePoint -utf16be_mbc_to_code(const UChar* p, const UChar* end) +utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { OnigCodePoint code; @@ -103,11 +104,11 @@ utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) if (code > 0xffff) { unsigned int plane, high; - plane = code >> 16; + plane = (code >> 16) - 1; *p++ = (plane >> 2) + 0xd8; high = (code & 0xff00) >> 8; *p++ = ((plane & 0x03) << 6) + (high >> 2); - *p++ = (high & 0x02) + 0xdc; + *p++ = (high & 0x03) + 0xdc; *p = (UChar )(code & 0xff); return 4; } @@ -119,43 +120,37 @@ utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +utf16be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) { const UChar* p = *pp; - if (*p == 0) { + if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) { p++; - *lower++ = '\0'; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp) += 2; - return 2; /* return byte length of converted char to lower */ - } - else { - int len; - len = EncLen_UTF16[*p]; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 2; + return 2; } } - (*pp) += len; - return len; /* return byte length of converted char to lower */ +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *pp += 2; + return 2; } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag, + pp, end, fold); } +#if 0 static int -utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; @@ -165,27 +160,27 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) int c, v; p++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; } + return (v != 0 ? TRUE : FALSE); } return FALSE; } +#endif static UChar* utf16be_left_adjust_char_head(const UChar* start, const UChar* s) @@ -202,31 +197,29 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s) return (UChar* )s; } +static int +utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE, + flag, p, end, items); +} + OnigEncodingType OnigEncodingUTF16_BE = { utf16be_mbc_enc_len, "UTF-16BE", /* name */ 4, /* max byte length */ 2, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, utf16be_is_mbc_newline, utf16be_mbc_to_code, utf16be_code_to_mbclen, utf16be_code_to_mbc, - utf16be_mbc_to_normalize, - utf16be_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + utf16be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, + onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c index 2248e4910f..5cc0759117 100755 --- a/ext/mbstring/oniguruma/enc/utf16_le.c +++ b/ext/mbstring/oniguruma/enc/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,9 +29,6 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) -#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) - static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -70,7 +67,11 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end) if (*p == 0x0a && *(p+1) == 0x00) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00) + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) && *(p+1) == 0x00) return 1; if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) return 1; @@ -80,7 +81,7 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end) } static OnigCodePoint -utf16le_mbc_to_code(const UChar* p, const UChar* end) +utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { OnigCodePoint code; UChar c0 = *p; @@ -105,13 +106,13 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) if (code > 0xffff) { unsigned int plane, high; - plane = code >> 16; + plane = (code >> 16) - 1; high = (code & 0xff00) >> 8; *p++ = ((plane & 0x03) << 6) + (high >> 2); *p++ = (plane >> 2) + 0xd8; *p++ = (UChar )(code & 0xff); - *p = (high & 0x02) + 0xdc; + *p = (high & 0x03) + 0xdc; return 4; } else { @@ -122,40 +123,37 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +utf16le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) { const UChar* p = *pp; - if (*(p+1) == 0) { - *(lower+1) = '\0'; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp) += 2; - return 2; /* return byte length of converted char to lower */ - } - else { - int len = EncLen_UTF16[*(p+1)]; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold = 0x01; + (*pp) += 2; + return 2; } } - (*pp) += len; - return len; /* return byte length of converted char to lower */ +#endif + + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold = 0; + *pp += 2; + return 2; } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, + fold); } +#if 0 static int -utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, + const UChar* end) { const UChar* p = *pp; @@ -164,26 +162,26 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) if (*(p+1) == 0) { int c, v; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); } return FALSE; } +#endif static UChar* utf16le_left_adjust_char_head(const UChar* start, const UChar* s) @@ -200,31 +198,29 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s) return (UChar* )s; } +static int +utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, + flag, p, end, items); +} + OnigEncodingType OnigEncodingUTF16_LE = { utf16le_mbc_enc_len, "UTF-16LE", /* name */ 4, /* max byte length */ 2, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, utf16le_is_mbc_newline, utf16le_mbc_to_code, utf16le_code_to_mbclen, utf16le_code_to_mbc, - utf16le_mbc_to_normalize, - utf16le_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + utf16le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, + onigenc_utf16_32_get_ctype_code_range, utf16le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c index 75133ca262..b4f822607c 100755 --- a/ext/mbstring/oniguruma/enc/utf32_be.c +++ b/ext/mbstring/oniguruma/enc/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ #include "regenc.h" static int -utf32be_mbc_enc_len(const UChar* p) +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED) { return 4; } @@ -42,7 +42,11 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end) if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*(p+3) == 0x0d || *(p+3) == 0x85) + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+3) == 0x0d || +#endif + *(p+3) == 0x85) && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) return 1; if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) @@ -54,13 +58,13 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end) } static OnigCodePoint -utf32be_mbc_to_code(const UChar* p, const UChar* end) +utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); } static int -utf32be_code_to_mbclen(OnigCodePoint code) +utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED) { return 4; } @@ -78,44 +82,39 @@ utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +utf32be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) { const UChar* p = *pp; - if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { - p += 3; - *lower++ = '\0'; - *lower++ = '\0'; - *lower++ = '\0'; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp) += 4; - return 4; /* return byte length of converted char to lower */ - } - else { - int len = 4; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; + if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) { + *fold++ = 0; + *fold++ = 0; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*(p+3) == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 4; + return 4; } } - (*pp) += len; - return len; /* return byte length of converted char to lower */ +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3)); + *pp += 4; + return 4; } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end, + fold); } +#if 0 static int -utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; @@ -125,26 +124,26 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) int c, v; p += 3; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; } + return (v != 0 ? TRUE : FALSE); } return FALSE; } +#endif static UChar* utf32be_left_adjust_char_head(const UChar* start, const UChar* s) @@ -157,31 +156,29 @@ utf32be_left_adjust_char_head(const UChar* start, const UChar* s) return (UChar* )(s - rem); } +static int +utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE, + flag, p, end, items); +} + OnigEncodingType OnigEncodingUTF32_BE = { utf32be_mbc_enc_len, "UTF-32BE", /* name */ 4, /* max byte length */ 4, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, utf32be_is_mbc_newline, utf32be_mbc_to_code, utf32be_code_to_mbclen, utf32be_code_to_mbc, - utf32be_mbc_to_normalize, - utf32be_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + utf32be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, + onigenc_utf16_32_get_ctype_code_range, utf32be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c index 21dca10c11..8f413bfc74 100755 --- a/ext/mbstring/oniguruma/enc/utf32_le.c +++ b/ext/mbstring/oniguruma/enc/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ #include "regenc.h" static int -utf32le_mbc_enc_len(const UChar* p) +utf32le_mbc_enc_len(const UChar* p ARG_UNUSED) { return 4; } @@ -42,8 +42,12 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end) if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00 - && (p+2) == 0x00 && *(p+3) == 0x00) + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) + && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00) return 1; if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) && *(p+2) == 0x00 && *(p+3) == 0x00) @@ -54,13 +58,13 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end) } static OnigCodePoint -utf32le_mbc_to_code(const UChar* p, const UChar* end) +utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); } static int -utf32le_code_to_mbclen(OnigCodePoint code) +utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED) { return 4; } @@ -78,43 +82,40 @@ utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) +utf32le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) { const UChar* p = *pp; - if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold++ = 0x01; + } } else { - *lower++ = *p; +#endif + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold++ = 0; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI } - *lower++ = '\0'; - *lower++ = '\0'; - *lower = '\0'; +#endif - (*pp) += 4; - return 4; /* return byte length of converted char to lower */ - } - else { - int len = 4; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ + *fold++ = 0; + *fold = 0; + *pp += 4; + return 4; } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end, + fold); } +#if 0 static int -utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; @@ -123,26 +124,26 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { int c, v; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; } + return (v != 0 ? TRUE : FALSE); } return FALSE; } +#endif static UChar* utf32le_left_adjust_char_head(const UChar* start, const UChar* s) @@ -155,31 +156,29 @@ utf32le_left_adjust_char_head(const UChar* start, const UChar* s) return (UChar* )(s - rem); } +static int +utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE, + flag, p, end, items); +} + OnigEncodingType OnigEncodingUTF32_LE = { utf32le_mbc_enc_len, "UTF-32LE", /* name */ 4, /* max byte length */ 4, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, utf32le_is_mbc_newline, utf32le_mbc_to_code, utf32le_code_to_mbclen, utf32le_code_to_mbc, - utf32le_mbc_to_normalize, - utf32le_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, + utf32le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, + onigenc_utf16_32_get_ctype_code_range, utf32le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c index c7481d7050..5e2c1721aa 100644 --- a/ext/mbstring/oniguruma/enc/utf8.c +++ b/ext/mbstring/oniguruma/enc/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,19 +60,21 @@ static const int EncLen_UTF8[] = { }; static int -utf8_mbc_enc_len(const UChar* p) +mbc_enc_len(const UChar* p) { return EncLen_UTF8[*p]; } static int -utf8_is_mbc_newline(const UChar* p, const UChar* end) +is_mbc_newline(const UChar* p, const UChar* end) { if (p < end) { if (*p == 0x0a) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS +#ifndef USE_CRNL_AS_LINE_TERMINATOR if (*p == 0x0d) return 1; +#endif if (p + 1 < end) { if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ return 1; @@ -89,12 +91,12 @@ utf8_is_mbc_newline(const UChar* p, const UChar* end) } static OnigCodePoint -utf8_mbc_to_code(const UChar* p, const UChar* end) +mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { int c, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_UTF8, p); + len = enclen(ONIG_ENCODING_UTF8, p); c = *p++; if (len > 1) { len--; @@ -116,14 +118,10 @@ utf8_mbc_to_code(const UChar* p, const UChar* end) } static int -utf8_code_to_mbclen(OnigCodePoint code) +code_to_mbclen(OnigCodePoint code) { if ((code & 0xffffff80) == 0) return 1; - else if ((code & 0xfffff800) == 0) { - if (code <= 0xff && code >= 0xfe) - return 1; - return 2; - } + else if ((code & 0xfffff800) == 0) return 2; else if ((code & 0xffff0000) == 0) return 3; else if ((code & 0xffe00000) == 0) return 4; else if ((code & 0xfc000000) == 0) return 5; @@ -133,35 +131,11 @@ utf8_code_to_mbclen(OnigCodePoint code) else if (code == INVALID_CODE_FF) return 1; #endif else - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; -} - -#if 0 -static int -utf8_code_to_mbc_first(OnigCodePoint code) -{ - if ((code & 0xffffff80) == 0) - return code; - else { - if ((code & 0xfffff800) == 0) - return ((code>>6)& 0x1f) | 0xc0; - else if ((code & 0xffff0000) == 0) - return ((code>>12) & 0x0f) | 0xe0; - else if ((code & 0xffe00000) == 0) - return ((code>>18) & 0x07) | 0xf0; - else if ((code & 0xfc000000) == 0) - return ((code>>24) & 0x03) | 0xf8; - else if ((code & 0x80000000) == 0) - return ((code>>30) & 0x01) | 0xfc; - else { - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; - } - } + return ONIGERR_INVALID_CODE_POINT_VALUE; } -#endif static int -utf8_code_to_mbc(OnigCodePoint code, UChar *buf) +code_to_mbc(OnigCodePoint code, UChar *buf) { #define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) #define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) @@ -209,7 +183,7 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf) } #endif else { - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; } *p++ = UTF8_TRAIL0(code); @@ -218,76 +192,60 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, UChar* fold) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0xc4; + *fold = 0xb1; + (*pp)++; + return 2; + } } +#endif + + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); (*pp)++; return 1; /* return byte length of converted char to lower */ } else { - int len; - - if (*p == 195) { /* 195 == '\303' */ - int c = *(p + 1); - if (c >= 128) { - if (c <= (UChar )'\236' && /* upper */ - (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { - if (c != (UChar )'\227') { - *lower++ = *p; - *lower = (UChar )(c + 32); - (*pp) += 2; - return 2; - } - } - } - } - - len = enc_len(ONIG_ENCODING_UTF8, p); - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag, + pp, end, fold); } } +#if 0 static int -utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { (*pp)++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } else { - (*pp) += enc_len(ONIG_ENCODING_UTF8, p); + (*pp) += enclen(ONIG_ENCODING_UTF8, p); - if (*p == 195) { /* 195 == '\303' */ + if (*p == 0xc3) { int c = *(p + 1); - if (c >= 128) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { - if (c <= (UChar )'\236') { /* upper */ - if (c == (UChar )'\227') return FALSE; - return TRUE; - } - else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */ - if (c == (UChar )'\267') return FALSE; - return TRUE; - } + if (c >= 0x80) { + if (c <= (UChar )0x9e) { /* upper */ + if (c == (UChar )0x97) return FALSE; + return TRUE; + } + else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ + if (c == (UChar )'\267') return FALSE; + return TRUE; + } + else if (c == (UChar )0x9f && + (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; } } } @@ -295,3401 +253,20 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) return FALSE; } - - -static const OnigCodePoint EmptyRange[] = { 0 }; - -static const OnigCodePoint SBAlnum[] = { - 3, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBAlnum[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 411, -#else - 6, -#endif - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bef, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f29, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x1371, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBAlnum */ - -static const OnigCodePoint SBAlpha[] = { - 2, - 0x0041, 0x005a, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBAlpha[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 394, -#else - 6, -#endif - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09f0, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a70, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x180b, 0x180d, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBAlpha */ - -static const OnigCodePoint SBBlank[] = { - 2, - 0x0009, 0x0009, - 0x0020, 0x0020 -}; - -static const OnigCodePoint MBBlank[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 7, -#else - 1, -#endif - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBBlank */ - -static const OnigCodePoint SBCntrl[] = { - 2, - 0x0000, 0x001f, - 0x007f, 0x007f -}; - -static const OnigCodePoint MBCntrl[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 18, -#else - 2, -#endif - 0x0080, 0x009f, - 0x00ad, 0x00ad -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xd800, 0xf8ff, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBCntrl */ - -static const OnigCodePoint SBDigit[] = { - 1, - 0x0030, 0x0039 -}; - -static const OnigCodePoint MBDigit[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 22, -#else - 0 -#endif -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be7, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x1369, 0x1371, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBDigit */ - -static const OnigCodePoint SBGraph[] = { - 1, - 0x0021, 0x007e -}; - -static const OnigCodePoint MBGraph[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 404, -#else - 1, -#endif - 0x00a1, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1681, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x200b, 0x2027, - 0x202a, 0x202e, - 0x2030, 0x2054, - 0x2057, 0x2057, - 0x2060, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3001, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBGraph */ - -static const OnigCodePoint SBLower[] = { - 1, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBLower[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 423, -#else - 5, -#endif - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0236, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fb, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d6b, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213d, 0x213d, - 0x2146, 0x2149, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a3, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBLower */ - -static const OnigCodePoint SBPrint[] = { - 2, - 0x0009, 0x000d, - 0x0020, 0x007e -}; - -static const OnigCodePoint MBPrint[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 403, -#else - 2, #endif - 0x0085, 0x0085, - 0x00a0, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2054, - 0x2057, 0x2057, - 0x205f, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBPrint */ - -static const OnigCodePoint SBPunct[] = { - 9, - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d -}; /* end of SBPunct */ - -static const OnigCodePoint MBPunct[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 77, -#else - 5, -#endif - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x2054, - 0x2057, 0x2057, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBPunct */ - -static const OnigCodePoint SBSpace[] = { - 2, - 0x0009, 0x000d, - 0x0020, 0x0020 -}; - -static const OnigCodePoint MBSpace[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 9, -#else - 2, -#endif - 0x0085, 0x0085, - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBSpace */ - -static const OnigCodePoint SBUpper[] = { - 1, - 0x0041, 0x005a -}; - -static const OnigCodePoint MBUpper[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 420, -#else - 2, -#endif - 0x00c0, 0x00d6, - 0x00d8, 0x00de -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x0400, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBUpper */ - -static const OnigCodePoint SBXDigit[] = { - 3, - 0x0030, 0x0039, - 0x0041, 0x0046, - 0x0061, 0x0066 -}; - -static const OnigCodePoint SBASCII[] = { - 1, - 0x0000, 0x007f -}; - -static const OnigCodePoint SBWord[] = { - 4, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x005f, 0x005f, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBWord[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 432, -#else - 8, -#endif - 0x00aa, 0x00aa, - 0x00b2, 0x00b3, - 0x00b5, 0x00b5, - 0x00b9, 0x00ba, - 0x00bc, 0x00be, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, -#ifndef USE_UNICODE_FULL_RANGE_CTYPE - 0x00f8, 0x7fffffff -#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ - 0x00f8, 0x0236, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x09f4, 0x09f9, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bf2, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f33, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x3005, 0x3007, - 0x3021, 0x302f, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff3f, 0xff3f, - 0xff41, 0xff5a, - 0xff65, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBWord */ static int -utf8_get_ctype_code_range(int ctype, - const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) +get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, + const OnigCodePoint* ranges[]) { -#define CR_SET(sbl,mbl) do { \ - *sbr = sbl; \ - *mbr = mbl; \ -} while (0) - -#define CR_SB_SET(sbl) do { \ - *sbr = sbl; \ - *mbr = EmptyRange; \ -} while (0) - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - CR_SET(SBAlpha, MBAlpha); - break; - case ONIGENC_CTYPE_BLANK: - CR_SET(SBBlank, MBBlank); - break; - case ONIGENC_CTYPE_CNTRL: - CR_SET(SBCntrl, MBCntrl); - break; - case ONIGENC_CTYPE_DIGIT: - CR_SET(SBDigit, MBDigit); - break; - case ONIGENC_CTYPE_GRAPH: - CR_SET(SBGraph, MBGraph); - break; - case ONIGENC_CTYPE_LOWER: - CR_SET(SBLower, MBLower); - break; - case ONIGENC_CTYPE_PRINT: - CR_SET(SBPrint, MBPrint); - break; - case ONIGENC_CTYPE_PUNCT: - CR_SET(SBPunct, MBPunct); - break; - case ONIGENC_CTYPE_SPACE: - CR_SET(SBSpace, MBSpace); - break; - case ONIGENC_CTYPE_UPPER: - CR_SET(SBUpper, MBUpper); - break; - case ONIGENC_CTYPE_XDIGIT: - CR_SB_SET(SBXDigit); - break; - case ONIGENC_CTYPE_WORD: - CR_SET(SBWord, MBWord); - break; - case ONIGENC_CTYPE_ASCII: - CR_SB_SET(SBASCII); - break; - case ONIGENC_CTYPE_ALNUM: - CR_SET(SBAlnum, MBAlnum); - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - - return 0; + *sb_out = 0x80; + return onigenc_unicode_ctype_code_range(ctype, ranges); } -static int -utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - const OnigCodePoint *range; -#endif - - if (code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - range = MBAlpha; - break; - case ONIGENC_CTYPE_BLANK: - range = MBBlank; - break; - case ONIGENC_CTYPE_CNTRL: - range = MBCntrl; - break; - case ONIGENC_CTYPE_DIGIT: - range = MBDigit; - break; - case ONIGENC_CTYPE_GRAPH: - range = MBGraph; - break; - case ONIGENC_CTYPE_LOWER: - range = MBLower; - break; - case ONIGENC_CTYPE_PRINT: - range = MBPrint; - break; - case ONIGENC_CTYPE_PUNCT: - range = MBPunct; - break; - case ONIGENC_CTYPE_SPACE: - range = MBSpace; - break; - case ONIGENC_CTYPE_UPPER: - range = MBUpper; - break; - case ONIGENC_CTYPE_XDIGIT: - return FALSE; - break; - case ONIGENC_CTYPE_WORD: - range = MBWord; - break; - case ONIGENC_CTYPE_ASCII: - return FALSE; - break; - case ONIGENC_CTYPE_ALNUM: - range = MBAlnum; - break; - case ONIGENC_CTYPE_NEWLINE: - return FALSE; - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - - return onig_is_in_code_range((UChar* )range, code); - -#else - - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { -#ifdef USE_INVALID_CODE_SCHEME - if (code <= VALID_CODE_LIMIT) -#endif - return TRUE; - } -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ - - return FALSE; -} static UChar* -utf8_left_adjust_char_head(const UChar* start, const UChar* s) +left_adjust_char_head(const UChar* start, const UChar* s) { const UChar *p; @@ -3700,31 +277,29 @@ utf8_left_adjust_char_head(const UChar* start, const UChar* s) return (UChar* )p; } +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8, + flag, p, end, items); +} + OnigEncodingType OnigEncodingUTF8 = { - utf8_mbc_enc_len, + mbc_enc_len, "UTF-8", /* name */ 6, /* max byte length */ 1, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - utf8_is_mbc_newline, - utf8_mbc_to_code, - utf8_code_to_mbclen, - utf8_code_to_mbc, - utf8_mbc_to_normalize, - utf8_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - utf8_is_code_ctype, - utf8_get_ctype_code_range, - utf8_left_adjust_char_head, + is_mbc_newline, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html index d55f1cc94f..af3426ce32 100755 --- a/ext/mbstring/oniguruma/index.html +++ b/ext/mbstring/oniguruma/index.html @@ -8,7 +8,7 @@

Oniguruma

(Japanese)

-(c) K.Kosako, updated at: 2007/08/16 +(c) K.Kosako, updated at: 2010/01/09

@@ -16,8 +16,8 @@
What's new
    +
  • 2010/01/09: Version 5.9.2 released.
  • 2007/08/16: Version 4.7.1 released.
  • -
  • 2007/07/14: Version 5.9.0 released.
  • 2007/06/20: Version 2.5.9 released.
  • 2007/06/20: Maintainer of 2.x was changed.
@@ -62,9 +62,8 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
Download:
- -
-Back to Home diff --git a/ext/mbstring/oniguruma/index_ja.html b/ext/mbstring/oniguruma/index_ja.html new file mode 100644 index 0000000000..e03b045b98 --- /dev/null +++ b/ext/mbstring/oniguruma/index_ja.html @@ -0,0 +1,190 @@ + + + + 鬼車 + + + +

鬼車

+ +

+(c) K.Kosako, 最終更新: 2010/01/09 +

+ +
+ +
更新情報 + +
    +
  • 2010/01/09: Version 5.9.2 リリース
  • +
  • 2007/08/16: Version 4.7.1 リリース
  • +
  • 2007/06/20: Version 2.5.9 リリース
  • +
  • 2007/06/20: 2.xの保守担当者を変更
  • +
+
+
+ +

+鬼車は正規表現ライブラリである。
+このライブラリの特徴は、それぞれの正規表現オブジェクトごとに異なる文字エンコーディングを +指定できること。
+(API: GNU regex, POSIX and Oniguruma native) +

+ +
+
対応している文字エンコーディング:
+ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
+EUC-JP, EUC-TW, EUC-KR, EUC-CN,
+Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
+ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
+ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+ +(GB18030は、KUBO Takehiro氏提供)
+(CP1251は、Byte氏提供) +
+

+
+ +
+ +
ライセンス:BSDライセンス + +
+
プラットフォーム: +
    +
  • Unix (Mac OS Xを含む) +
  • Cygwin +
  • Win32 +
+ +
+ +
ダウンロード: + + +
+ +2.xの保守担当は、Hannes Wyss <hwyss AT ywesee.com>に交替しました。
+2.xについては、彼に連絡してください。
+
+* 5.xはUnicode Property/Scriptを提供
+* 2.xはRuby1.6/1.8組込みライブラリとして動作する。 (2006年末で保守を終了)
+ +
+
ドキュメント: (version 5.9.2) + + +
+
サンプルプログラム: + + +
+
サイト: + + +
+
リンク: + + +
+
参考資料: + + +
+
+

+and I'm thankful to Akinori MUSHA. +

+ +
+
+
他のライブラリ: + +
+ +
+ホームにもどる + + diff --git a/ext/mbstring/oniguruma/onigposix.h b/ext/mbstring/oniguruma/onigposix.h index cfeb88a292..f1cb35fbd8 100644 --- a/ext/mbstring/oniguruma/onigposix.h +++ b/ext/mbstring/oniguruma/onigposix.h @@ -97,7 +97,7 @@ typedef struct { #ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) +#if defined(EXPORT) #define ONIG_EXTERN extern __declspec(dllexport) #else #define ONIG_EXTERN extern __declspec(dllimport) diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index 5196a3d585..3b557639f1 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2009 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,16 +29,14 @@ * SUCH DAMAGE. */ -#include "../php_onig_compat.h" - #ifdef __cplusplus extern "C" { #endif #define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 4 -#define ONIGURUMA_VERSION_MINOR 7 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_MAJOR 5 +#define ONIGURUMA_VERSION_MINOR 9 +#define ONIGURUMA_VERSION_TEENY 2 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -56,6 +54,12 @@ extern "C" { # endif #endif +#ifdef HAVE_STDARG_H +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + #ifndef P_ #if defined(__STDC__) || defined(_WIN32) # define P_(args) args @@ -94,28 +98,27 @@ extern "C" { typedef unsigned char OnigUChar; typedef unsigned long OnigCodePoint; +typedef unsigned int OnigCtype; typedef unsigned int OnigDistance; #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) -/* ambiguous match flag */ -typedef unsigned int OnigAmbigType; +typedef unsigned int OnigCaseFoldType; /* case fold flag */ -ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag; +ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; -#define ONIGENC_AMBIGUOUS_MATCH_NONE 0 -#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0) -#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1) +/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ +/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ +#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) +#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) -#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1) +#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR +#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag -#define ONIGENC_AMBIGUOUS_MATCH_FULL \ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ) -#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag - -#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3 -#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4 +#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 +#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 +/* 13 => Unicode:0x1ffc */ /* code range */ #define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) @@ -123,20 +126,10 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag; #define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] typedef struct { - int len; - OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN]; -} OnigCompAmbigCodeItem; - -typedef struct { - int n; - OnigCodePoint code; - OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM]; -} OnigCompAmbigCodes; - -typedef struct { - OnigCodePoint from; - OnigCodePoint to; -} OnigPairAmbigCodes; + int byte_len; /* argument(original) character(s) byte length */ + int code_len; /* number of code */ + OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; +} OnigCaseFoldCodeItem; typedef struct { OnigCodePoint esc; @@ -146,32 +139,24 @@ typedef struct { OnigCodePoint one_or_more_time; OnigCodePoint anychar_anytime; } OnigMetaCharTableType; + +typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); - -#if defined(RUBY_PLATFORM) && defined(M17N_H) - -#define ONIG_RUBY_M17N -typedef m17n_encoding* OnigEncoding; - -#else - -typedef struct { +typedef struct OnigEncodingTypeST { int (*mbc_enc_len)(const OnigUChar* p); const char* name; int max_enc_len; int min_enc_len; - OnigAmbigType support_ambig_flag; - OnigMetaCharTableType meta_char_table; int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end); OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end); int (*code_to_mbclen)(OnigCodePoint code); int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); - int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); - int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end); - int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs); - int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs); - int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); - int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]); + int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); + int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg); + int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]); + int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); + int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype); + int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]); OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); } OnigEncodingType; @@ -206,6 +191,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; +ONIG_EXTERN OnigEncodingType OnigEncodingCP1251; ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; @@ -237,136 +223,60 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) #define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) #define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) +#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251) #define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) #define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) -#endif /* else RUBY && M17N */ - #define ONIG_ENCODING_UNDEF ((OnigEncoding )0) /* work size */ -#define ONIGENC_CODE_TO_MBC_MAXLEN 7 -#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 +/* 18: 6(max-byte) * 3(case-fold chars) */ /* character types */ -#define ONIGENC_CTYPE_NEWLINE (1<< 0) -#define ONIGENC_CTYPE_ALPHA (1<< 1) -#define ONIGENC_CTYPE_BLANK (1<< 2) -#define ONIGENC_CTYPE_CNTRL (1<< 3) -#define ONIGENC_CTYPE_DIGIT (1<< 4) -#define ONIGENC_CTYPE_GRAPH (1<< 5) -#define ONIGENC_CTYPE_LOWER (1<< 6) -#define ONIGENC_CTYPE_PRINT (1<< 7) -#define ONIGENC_CTYPE_PUNCT (1<< 8) -#define ONIGENC_CTYPE_SPACE (1<< 9) -#define ONIGENC_CTYPE_UPPER (1<<10) -#define ONIGENC_CTYPE_XDIGIT (1<<11) -#define ONIGENC_CTYPE_WORD (1<<12) -#define ONIGENC_CTYPE_ASCII (1<<13) -#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) - -#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p) +#define ONIGENC_CTYPE_NEWLINE 0 +#define ONIGENC_CTYPE_ALPHA 1 +#define ONIGENC_CTYPE_BLANK 2 +#define ONIGENC_CTYPE_CNTRL 3 +#define ONIGENC_CTYPE_DIGIT 4 +#define ONIGENC_CTYPE_GRAPH 5 +#define ONIGENC_CTYPE_LOWER 6 +#define ONIGENC_CTYPE_PRINT 7 +#define ONIGENC_CTYPE_PUNCT 8 +#define ONIGENC_CTYPE_SPACE 9 +#define ONIGENC_CTYPE_UPPER 10 +#define ONIGENC_CTYPE_XDIGIT 11 +#define ONIGENC_CTYPE_WORD 12 +#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ +#define ONIGENC_CTYPE_ASCII 14 +#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII + + +#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) #define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) -#define ONIGENC_IS_CODE_SB_WORD(enc,code) \ - (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) #define ONIGENC_IS_MBC_WORD(enc,s,end) \ ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) -#ifdef ONIG_RUBY_M17N - -#include /* for isblank(), isgraph() */ - -#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ - onigenc_mbc_to_normalize(enc,flag,pp,end,buf) -#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ - onigenc_is_mbc_ambiguous(enc,flag,pp,end) - -#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE -#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ - onigenc_is_allowed_reverse_match(enc, s, end) -#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ - onigenc_get_left_adjust_char_head(enc, start, s) -#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0 -#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0 -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \ - ONIG_NO_SUPPORT_CONFIG -#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p)) -#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) -#define ONIGENC_MBC_MAXLEN_DIST(enc) \ - (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \ - : ONIG_INFINITE_DISTANCE) -#define ONIGENC_MBC_MINLEN(enc) 1 -#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e)) -#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code)) -#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf) - -#if 0 /* !! not supported !! */ -#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) -#define ONIGENC_STEP_BACK(enc,start,s,n) -#endif - -#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \ - onigenc_is_code_ctype(enc,code,ctype) - -#ifdef isblank -# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code) -#else -# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t') -#endif -#ifdef isgraph -# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code) -#else -# define ONIGENC_IS_CODE_GRAPH(enc,code) \ - (isprint((int )code) && !isspace((int )code)) -#endif - -#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code) -#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code) -#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code) -#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code) -#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code) -#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code) -#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code) -#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code) -#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code) -#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code) -#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code) - -ONIG_EXTERN -int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); -ONIG_EXTERN -int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf)); -ONIG_EXTERN -int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf)); -ONIG_EXTERN -int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)); -ONIG_EXTERN -int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); - -#else /* ONIG_RUBY_M17N */ - #define ONIGENC_NAME(enc) ((enc)->name) -#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ - (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf) -#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ - (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end) -#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag) +#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ + (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf) #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ (enc)->is_allowed_reverse_match(s,end) #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ (enc)->left_adjust_char_head(start, s) -#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \ - (enc)->get_all_pair_ambig_codes(ambig_flag,acs) -#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \ - (enc)->get_all_comp_ambig_codes(ambig_flag,acs) +#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ + (enc)->apply_all_case_fold(case_fold_flag,f,arg) +#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ + (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs) #define ONIGENC_STEP_BACK(enc,start,s,n) \ onigenc_step_back((enc),(start),(s),(n)) @@ -378,6 +288,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, c #define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end)) #define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) #define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) +#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ + (enc)->property_name_to_ctype(enc,p,end) #define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype) @@ -410,14 +322,12 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, c #define ONIGENC_IS_CODE_WORD(enc,code) \ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \ - (enc)->get_ctype_code_range(ctype,sbr,mbr) +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ + (enc)->get_ctype_code_range(ctype,sbout,ranges) ONIG_EXTERN OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n)); -#endif /* is not ONIG_RUBY_M17N */ - /* encoding API */ ONIG_EXTERN @@ -482,10 +392,11 @@ typedef unsigned int OnigOptionType; /* syntax */ typedef struct { - unsigned int op; - unsigned int op2; - unsigned int behavior; - OnigOptionType options; /* default option */ + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ + OnigMetaCharTableType meta_char_table; } OnigSyntaxType; ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; @@ -566,7 +477,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ #define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ -#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */ +/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ #define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ @@ -666,6 +577,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_NEVER_ENDING_RECURSION -221 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_CODE_POINT_VALUE -400 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 @@ -755,10 +667,10 @@ typedef struct re_pattern_buffer { int repeat_range_alloc; OnigRepeatRange* repeat_range; - OnigEncoding enc; + OnigEncoding enc; OnigOptionType options; OnigSyntaxType* syntax; - OnigAmbigType ambig_flag; + OnigCaseFoldType case_fold_flag; void* name_table; /* optimization info (string search, char-map and anchors) */ @@ -793,7 +705,7 @@ typedef struct { OnigEncoding target_enc; OnigSyntaxType* syntax; OnigOptionType option; - OnigAmbigType ambig_flag; + OnigCaseFoldType case_fold_flag; } OnigCompileInfo; /* Oniguruma Native API */ @@ -808,10 +720,15 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN +int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); +int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN void onig_free P_((OnigRegex)); ONIG_EXTERN +void onig_free_body P_((OnigRegex)); +ONIG_EXTERN int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); @@ -856,7 +773,7 @@ OnigEncoding onig_get_encoding P_((OnigRegex reg)); ONIG_EXTERN OnigOptionType onig_get_options P_((OnigRegex reg)); ONIG_EXTERN -OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg)); +OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg)); ONIG_EXTERN OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); ONIG_EXTERN @@ -880,13 +797,13 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior) ONIG_EXTERN void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); ONIG_EXTERN -int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code)); +int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code)); ONIG_EXTERN void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); ONIG_EXTERN -OnigAmbigType onig_get_default_ambig_flag P_((void)); +OnigCaseFoldType onig_get_default_case_fold_flag P_((void)); ONIG_EXTERN -int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag)); +int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag)); ONIG_EXTERN unsigned int onig_get_match_stack_limit_size P_((void)); ONIG_EXTERN diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index 6a0976dee2..f9d99563b3 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,26 +29,28 @@ #include "regparse.h" -OnigAmbigType OnigDefaultAmbigFlag = - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE); +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; -extern OnigAmbigType -onig_get_default_ambig_flag(void) +extern OnigCaseFoldType +onig_get_default_case_fold_flag(void) { - return OnigDefaultAmbigFlag; + return OnigDefaultCaseFoldFlag; } extern int -onig_set_default_ambig_flag(OnigAmbigType ambig_flag) +onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) { - OnigDefaultAmbigFlag = ambig_flag; + OnigDefaultCaseFoldFlag = case_fold_flag; return 0; } +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + static UChar* -k_strdup(UChar* s, UChar* end) +str_dup(UChar* s, UChar* end) { int len = end - s; @@ -62,15 +64,29 @@ k_strdup(UChar* s, UChar* end) else return NULL; } -/* - Caution: node should not be a string node. - (s and end member address break) -*/ static void swap_node(Node* a, Node* b) { Node c; c = *a; *a = *b; *b = c; + + if (NTYPE(a) == NT_STR) { + StrNode* sn = NSTR(a); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } + + if (NTYPE(b) == NT_STR) { + StrNode* sn = NSTR(b); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } } static OnigDistance @@ -99,7 +115,7 @@ static int bitset_is_empty(BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { + for (i = 0; i < (int )BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } return 1; @@ -122,8 +138,14 @@ bitset_on_num(BitSetRef bs) extern int onig_bbuf_init(BBuf* buf, int size) { - buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + if (size <= 0) { + size = 0; + buf->p = NULL; + } + else { + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + } buf->alloc = size; buf->used = 0; @@ -139,7 +161,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size) UnsetAddr* p; p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); uslist->num = 0; uslist->alloc = size; uslist->us = p; @@ -162,7 +184,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) if (uslist->num >= uslist->alloc) { size = uslist->alloc * 2; p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); uslist->alloc = size; uslist->us = p; } @@ -394,8 +416,8 @@ compile_tree_n_times(Node* node, int n, regex_t* reg) } static int -add_compile_string_length(UChar* s, int mb_len, int str_len, - regex_t* reg, int ignore_case) +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, + regex_t* reg ARG_UNUSED, int ignore_case) { int len; int op = select_str_opcode(mb_len, str_len, ignore_case); @@ -440,20 +462,20 @@ compile_length_string_node(Node* node, regex_t* reg) UChar *p, *prev; StrNode* sn; - sn = &(NSTRING(node)); + sn = NSTR(node); if (sn->end <= sn->s) return 0; ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p); + prev_len = enclen(enc, p); p += prev_len; slen = 1; rlen = 0; for (; p < sn->end; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (len == prev_len) { slen++; } @@ -488,7 +510,7 @@ compile_string_node(Node* node, regex_t* reg) UChar *p, *prev, *end; StrNode* sn; - sn = &(NSTRING(node)); + sn = NSTR(node); if (sn->end <= sn->s) return 0; @@ -496,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg) ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p); + prev_len = enclen(enc, p); p += prev_len; slen = 1; for (; p < end; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (len == prev_len) { slen++; } @@ -535,8 +557,6 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) add_length(reg, mbuf->used); return add_bytes(reg, mbuf->p, mbuf->used); #else - static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; - int r, pad_size; UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; @@ -558,7 +578,7 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg) { int len; - if (IS_CCLASS_SHARE(cc)) { + if (IS_NCCLASS_SHARE(cc)) { len = SIZE_OPCODE + SIZE_POINTER; return len; } @@ -588,14 +608,14 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) { int r; - if (IS_CCLASS_SHARE(cc)) { + if (IS_NCCLASS_SHARE(cc)) { add_opcode(reg, OP_CCLASS_NODE); r = add_pointer(reg, cc); return r; } if (IS_NULL(cc->mbuf)) { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_NOT); else add_opcode(reg, OP_CCLASS); @@ -604,7 +624,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) } else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MB_NOT); else add_opcode(reg, OP_CCLASS_MB); @@ -612,7 +632,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) r = add_multi_byte_cclass(cc->mbuf, reg); } else { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MIX_NOT); else add_opcode(reg, OP_CCLASS_MIX); @@ -635,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) if (reg->repeat_range_alloc == 0) { p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); reg->repeat_range = p; reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; } @@ -644,7 +664,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); reg->repeat_range = p; reg->repeat_range_alloc = n; } @@ -658,7 +678,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } static int -compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, regex_t* reg) { int r; @@ -694,10 +714,10 @@ compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, } static int -is_anychar_star_quantifier(QuantifierNode* qn) +is_anychar_star_quantifier(QtfrNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == N_ANYCHAR) + NTYPE(qn->target) == NT_CANY) return 1; else return 0; @@ -709,7 +729,7 @@ is_anychar_star_quantifier(QuantifierNode* qn) #ifdef USE_COMBINATION_EXPLOSION_CHECK static int -compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) { int len, mod_tlen, cklen; int ckn; @@ -724,7 +744,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); /* anychar repeat */ - if (NTYPE(qn->target) == N_ANYCHAR) { + if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; @@ -789,7 +809,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) } static int -compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_quantifier_node(QtfrNode* qn, regex_t* reg) { int r, mod_tlen; int ckn; @@ -815,7 +835,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) if (r) return r; } - return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) { @@ -943,7 +963,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) #else /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -953,7 +973,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) if (tlen < 0) return tlen; /* anychar repeat */ - if (NTYPE(qn->target) == N_ANYCHAR) { + if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; @@ -1008,7 +1028,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) } static int -compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_quantifier_node(QtfrNode* qn, regex_t* reg) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -1026,7 +1046,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; - return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) @@ -1067,7 +1087,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, mod_tlen + SIZE_OP_JUMP); if (r) return r; - add_bytes(reg, NSTRING(qn->head_exact).s, 1); + add_bytes(reg, NSTR(qn->head_exact)->s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, @@ -1077,7 +1097,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, mod_tlen + SIZE_OP_JUMP); if (r) return r; - add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, @@ -1136,7 +1156,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_option_node(EffectNode* node, regex_t* reg) +compile_length_option_node(EncloseNode* node, regex_t* reg) { int tlen; OnigOptionType prev = reg->options; @@ -1156,7 +1176,7 @@ compile_length_option_node(EffectNode* node, regex_t* reg) } static int -compile_option_node(EffectNode* node, regex_t* reg) +compile_option_node(EncloseNode* node, regex_t* reg) { int r; OnigOptionType prev = reg->options; @@ -1182,12 +1202,12 @@ compile_option_node(EffectNode* node, regex_t* reg) } static int -compile_length_effect_node(EffectNode* node, regex_t* reg) +compile_length_enclose_node(EncloseNode* node, regex_t* reg) { int len; int tlen; - if (node->type == EFFECT_OPTION) + if (node->type == ENCLOSE_OPTION) return compile_length_option_node(node, reg); if (node->target) { @@ -1198,16 +1218,16 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) tlen = 0; switch (node->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } else @@ -1223,9 +1243,9 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) } break; - case EFFECT_STOP_BACKTRACK: - if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QuantifierNode* qn = &NQUANTIFIER(node->target); + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -1248,17 +1268,17 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) static int get_char_length_tree(Node* node, regex_t* reg, int* len); static int -compile_effect_node(EffectNode* node, regex_t* reg) +compile_enclose_node(EncloseNode* node, regex_t* reg) { int r, len; - if (node->type == EFFECT_OPTION) + if (node->type == ENCLOSE_OPTION) return compile_option_node(node, reg); switch (node->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r) return r; node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; @@ -1268,10 +1288,10 @@ compile_effect_node(EffectNode* node, regex_t* reg) len = compile_length_tree(node->target, reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); r = add_opcode_rel_addr(reg, OP_JUMP, len); @@ -1288,12 +1308,12 @@ compile_effect_node(EffectNode* node, regex_t* reg) r = compile_tree(node->target, reg); if (r) return r; #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else - r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END)); if (r) return r; @@ -1313,9 +1333,9 @@ compile_effect_node(EffectNode* node, regex_t* reg) } break; - case EFFECT_STOP_BACKTRACK: - if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QuantifierNode* qn = &NQUANTIFIER(node->target); + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; @@ -1472,50 +1492,50 @@ compile_length_tree(Node* node, regex_t* reg) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: len = 0; do { - r = compile_length_tree(NCONS(node).left, reg); + r = compile_length_tree(NCAR(node), reg); if (r < 0) return r; len += r; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); r = len; break; - case N_ALT: + case NT_ALT: { int n; n = r = 0; do { - r += compile_length_tree(NCONS(node).left, reg); + r += compile_length_tree(NCAR(node), reg); n++; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); } break; - case N_STRING: + case NT_STR: if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(&(NSTRING(node)), reg); + r = compile_length_string_raw_node(NSTR(node), reg); else r = compile_length_string_node(node, reg); break; - case N_CCLASS: - r = compile_length_cclass_node(&(NCCLASS(node)), reg); + case NT_CCLASS: + r = compile_length_cclass_node(NCCLASS(node), reg); break; - case N_CTYPE: - case N_ANYCHAR: + case NT_CTYPE: + case NT_CANY: r = SIZE_OPCODE; break; - case N_BACKREF: + case NT_BREF: { - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); @@ -1533,21 +1553,21 @@ compile_length_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: r = SIZE_OP_CALL; break; #endif - case N_QUANTIFIER: - r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg); + case NT_QTFR: + r = compile_length_quantifier_node(NQTFR(node), reg); break; - case N_EFFECT: - r = compile_length_effect_node(&NEFFECT(node), reg); + case NT_ENCLOSE: + r = compile_length_enclose_node(NENCLOSE(node), reg); break; - case N_ANCHOR: - r = compile_length_anchor_node(&(NANCHOR(node)), reg); + case NT_ANCHOR: + r = compile_length_anchor_node(NANCHOR(node), reg); break; default: @@ -1565,59 +1585,61 @@ compile_tree(Node* node, regex_t* reg) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: do { - r = compile_tree(NCONS(node).left, reg); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = compile_tree(NCAR(node), reg); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { Node* x = node; len = 0; do { - len += compile_length_tree(NCONS(x).left, reg); - if (NCONS(x).right != NULL) { + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { len += SIZE_OP_PUSH + SIZE_OP_JUMP; } - } while (IS_NOT_NULL(x = NCONS(x).right)); + } while (IS_NOT_NULL(x = NCDR(x))); pos = reg->used + len; /* goal position */ do { - len = compile_length_tree(NCONS(node).left, reg); - if (IS_NOT_NULL(NCONS(node).right)) { + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); if (r) break; } - r = compile_tree(NCONS(node).left, reg); + r = compile_tree(NCAR(node), reg); if (r) break; - if (IS_NOT_NULL(NCONS(node).right)) { + if (IS_NOT_NULL(NCDR(node))) { len = pos - (reg->used + SIZE_OP_JUMP); r = add_opcode_rel_addr(reg, OP_JUMP, len); if (r) break; } - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_STRING: + case NT_STR: if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(&(NSTRING(node)), reg); + r = compile_string_raw_node(NSTR(node), reg); else r = compile_string_node(node, reg); break; - case N_CCLASS: - r = compile_cclass_node(&(NCCLASS(node)), reg); + case NT_CCLASS: + r = compile_cclass_node(NCCLASS(node), reg); break; - case N_CTYPE: + case NT_CTYPE: { int op; - switch (NCTYPE(node).type) { - case CTYPE_WORD: op = OP_WORD; break; - case CTYPE_NOT_WORD: op = OP_NOT_WORD; break; + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; + break; default: return ONIGERR_TYPE_BUG; break; @@ -1626,20 +1648,20 @@ compile_tree(Node* node, regex_t* reg) } break; - case N_ANYCHAR: + case NT_CANY: if (IS_MULTILINE(reg->options)) r = add_opcode(reg, OP_ANYCHAR_ML); else r = add_opcode(reg, OP_ANYCHAR); break; - case N_BACKREF: + case NT_BREF: { - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); if (r) return r; r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); if (r) return r; @@ -1681,7 +1703,7 @@ compile_tree(Node* node, regex_t* reg) } if (r) return r; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL add_bacref_mems: #endif r = add_length(reg, br->back_num); @@ -1696,21 +1718,21 @@ compile_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - r = compile_call(&(NCALL(node)), reg); + case NT_CALL: + r = compile_call(NCALL(node), reg); break; #endif - case N_QUANTIFIER: - r = compile_quantifier_node(&(NQUANTIFIER(node)), reg); + case NT_QTFR: + r = compile_quantifier_node(NQTFR(node), reg); break; - case N_EFFECT: - r = compile_effect_node(&NEFFECT(node), reg); + case NT_ENCLOSE: + r = compile_enclose_node(NENCLOSE(node), reg); break; - case N_ANCHOR: - r = compile_anchor_node(&(NANCHOR(node)), reg); + case NT_ANCHOR: + r = compile_anchor_node(NANCHOR(node), reg); break; default: @@ -1732,29 +1754,29 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) Node* node = *plink; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = noname_disable_map(&(NCONS(node).left), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = noname_disable_map(&(NCAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: + case NT_QTFR: { - Node** ptarget = &(NQUANTIFIER(node).target); + Node** ptarget = &(NQTFR(node)->target); Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) { + if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { onig_reduce_nested_quantifier(node, *ptarget); } } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); - if (en->type == EFFECT_MEMORY) { - if (IS_EFFECT_NAMED_GROUP(en)) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + if (IS_ENCLOSE_NAMED_GROUP(en)) { (*counter)++; map[en->regnum].new_val = *counter; en->regnum = *counter; @@ -1784,7 +1806,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; - BackrefNode* bn = &(NBACKREF(node)); + BRefNode* bn = NBREF(node); if (! IS_BACKREF_NAME_REF(bn)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; @@ -1813,20 +1835,20 @@ renumber_by_map(Node* node, GroupNumRemap* map) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = renumber_by_map(NCONS(node).left, map); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = renumber_by_map(NCAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = renumber_by_map(NQUANTIFIER(node).target, map); + case NT_QTFR: + r = renumber_by_map(NQTFR(node)->target, map); break; - case N_EFFECT: - r = renumber_by_map(NEFFECT(node).target, map); + case NT_ENCLOSE: + r = renumber_by_map(NENCLOSE(node)->target, map); break; - case N_BACKREF: + case NT_BREF: r = renumber_node_backref(node, map); break; @@ -1843,21 +1865,21 @@ numbered_ref_check(Node* node) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = numbered_ref_check(NCONS(node).left); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = numbered_ref_check(NCAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = numbered_ref_check(NQUANTIFIER(node).target); + case NT_QTFR: + r = numbered_ref_check(NQTFR(node)->target); break; - case N_EFFECT: - r = numbered_ref_check(NEFFECT(node).target); + case NT_ENCLOSE: + r = numbered_ref_check(NENCLOSE(node)->target); break; - case N_BACKREF: - if (! IS_BACKREF_NAME_REF(&(NBACKREF(node)))) + case NT_BREF: + if (! IS_BACKREF_NAME_REF(NBREF(node))) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; @@ -1876,7 +1898,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) GroupNumRemap* map; map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); - CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(map); for (i = 1; i <= env->num_mem; i++) { map[i].new_val = 0; } @@ -1914,12 +1936,12 @@ static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) { int i, offset; - EffectNode* en; + EncloseNode* en; AbsAddrType addr; for (i = 0; i < uslist->num; i++) { - en = &(NEFFECT(uslist->us[i].target)); - if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + en = NENCLOSE(uslist->us[i].target); + if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; addr = en->call_addr; offset = uslist->us[i].offset; @@ -1929,53 +1951,53 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) } #endif -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT static int quantifiers_memory_node_info(Node* node) { int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: { int v; do { - v = quantifiers_memory_node_info(NCONS(node).left); + v = quantifiers_memory_node_info(NCAR(node)); if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&NCALL(node))) { + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ } else - r = quantifiers_memory_node_info(NCALL(node).target); + r = quantifiers_memory_node_info(NCALL(node)->target); break; #endif - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { r = quantifiers_memory_node_info(qn->target); } } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: return NQ_TARGET_IS_EMPTY_MEM; break; - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = quantifiers_memory_node_info(en->target); break; default: @@ -1984,19 +2006,19 @@ quantifiers_memory_node_info(Node* node) } break; - case N_BACKREF: - case N_STRING: - case N_CTYPE: - case N_CCLASS: - case N_ANYCHAR: - case N_ANCHOR: + case NT_BREF: + case NT_STR: + case NT_CTYPE: + case NT_CCLASS: + case NT_CANY: + case NT_ANCHOR: default: break; } return r; } -#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */ +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ static int get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) @@ -2006,12 +2028,12 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) *min = 0; switch (NTYPE(node)) { - case N_BACKREF: + case NT_BREF: { int i; int* backs; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) break; backs = BACKREFS_P(br); @@ -2028,62 +2050,57 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&NCALL(node))) { - EffectNode* en = &(NEFFECT(NCALL(node).target)); - if (IS_EFFECT_MIN_FIXED(en)) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + EncloseNode* en = NENCLOSE(NCALL(node)->target); + if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; } else - r = get_min_match_length(NCALL(node).target, min, env); + r = get_min_match_length(NCALL(node)->target, min, env); break; #endif - case N_LIST: + case NT_LIST: do { - r = get_min_match_length(NCONS(node).left, &tmin, env); + r = get_min_match_length(NCAR(node), &tmin, env); if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { Node *x, *y; y = node; do { - x = NCONS(y).left; + x = NCAR(y); r = get_min_match_length(x, &tmin, env); if (r != 0) break; if (y == node) *min = tmin; else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right)); + } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); *min = sn->end - sn->s; } break; - case N_CTYPE: - switch (NCTYPE(node).type) { - case CTYPE_WORD: *min = 1; break; - case CTYPE_NOT_WORD: *min = 1; break; - default: - break; - } + case NT_CTYPE: + *min = 1; break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *min = 1; break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { r = get_min_match_length(qn->target, min, env); @@ -2093,32 +2110,32 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_MIN_FIXED(en)) + if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; else { r = get_min_match_length(en->target, min, env); if (r == 0) { en->min_len = *min; - SET_EFFECT_STATUS(node, NST_MIN_FIXED); + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_min_match_length(en->target, min, env); break; } } break; - case N_ANCHOR: + case NT_ANCHOR: default: break; } @@ -2134,51 +2151,43 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) *max = 0; switch (NTYPE(node)) { - case N_LIST: + case NT_LIST: do { - r = get_max_match_length(NCONS(node).left, &tmax, env); + r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0) *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: do { - r = get_max_match_length(NCONS(node).left, &tmax, env); + r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); *max = sn->end - sn->s; } break; - case N_CTYPE: - switch (NCTYPE(node).type) { - case CTYPE_WORD: - case CTYPE_NOT_WORD: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - default: - break; - } + case NT_CTYPE: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case N_BACKREF: + case NT_BREF: { int i; int* backs; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { *max = ONIG_INFINITE_DISTANCE; break; @@ -2194,17 +2203,17 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (! IS_CALL_RECURSION(&(NCALL(node)))) - r = get_max_match_length(NCALL(node).target, max, env); + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_max_match_length(NCALL(node)->target, max, env); else *max = ONIG_INFINITE_DISTANCE; break; #endif - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { r = get_max_match_length(qn->target, max, env); @@ -2218,32 +2227,32 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_MAX_FIXED(en)) + if (IS_ENCLOSE_MAX_FIXED(en)) *max = en->max_len; else { r = get_max_match_length(en->target, max, env); if (r == 0) { en->max_len = *max; - SET_EFFECT_STATUS(node, NST_MAX_FIXED); + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_max_match_length(en->target, max, env); break; } } break; - case N_ANCHOR: + case NT_ANCHOR: default: break; } @@ -2264,22 +2273,22 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) level++; *len = 0; switch (NTYPE(node)) { - case N_LIST: + case NT_LIST: do { - r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); if (r == 0) *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { int tlen2; int varlen = 0; - r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) { - r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level); + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); if (r == 0) { if (tlen != tlen2) varlen = 1; @@ -2298,20 +2307,20 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); UChar *s = sn->s; while (s < sn->end) { - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); (*len)++; } } break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower == qn->upper) { r = get_char_length_tree1(qn->target, reg, &tlen, level); if (r == 0) @@ -2323,47 +2332,42 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (! IS_CALL_RECURSION(&(NCALL(node)))) - r = get_char_length_tree1(NCALL(node).target, reg, len, level); + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_char_length_tree1(NCALL(node)->target, reg, len, level); else r = GET_CHAR_LEN_VARLEN; break; #endif - case N_CTYPE: - switch (NCTYPE(node).type) { - case CTYPE_WORD: - case CTYPE_NOT_WORD: - *len = 1; - break; - } + case NT_CTYPE: + *len = 1; break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *len = 1; break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CLEN_FIXED(en)) + if (IS_ENCLOSE_CLEN_FIXED(en)) *len = en->char_len; else { r = get_char_length_tree1(en->target, reg, len, level); if (r == 0) { en->char_len = *len; - SET_EFFECT_STATUS(node, NST_CLEN_FIXED); + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_char_length_tree1(en->target, reg, len, level); break; default: @@ -2372,7 +2376,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_ANCHOR: + case NT_ANCHOR: break; default: @@ -2401,29 +2405,18 @@ is_not_included(Node* x, Node* y, regex_t* reg) retry: ytype = NTYPE(y); switch (NTYPE(x)) { - case N_CTYPE: + case NT_CTYPE: { switch (ytype) { - case N_CTYPE: - switch (NCTYPE(x).type) { - case CTYPE_WORD: - if (NCTYPE(y).type == CTYPE_NOT_WORD) - return 1; - else - return 0; - break; - case CTYPE_NOT_WORD: - if (NCTYPE(y).type == CTYPE_WORD) - return 1; - else - return 0; - break; - default: - break; - } + case NT_CTYPE: + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not) + return 1; + else + return 0; break; - case N_CCLASS: + case NT_CCLASS: swap: { Node* tmp; @@ -2432,7 +2425,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: goto swap; break; @@ -2442,37 +2435,39 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* xc = &(NCCLASS(x)); + CClassNode* xc = NCCLASS(x); switch (ytype) { - case N_CTYPE: - switch (NCTYPE(y).type) { - case CTYPE_WORD: - if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(xc->bs, i)) { - if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0; + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (IS_CODE_SB_WORD(reg->enc, i)) return 0; + } } + return 1; } - return 1; + return 0; } - return 0; - break; - case CTYPE_NOT_WORD: - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) { - if (!IS_CCLASS_NOT(xc)) { - if (BITSET_AT(xc->bs, i)) - return 0; - } - else { - if (! BITSET_AT(xc->bs, i)) - return 0; + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_NCCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } } } + return 1; } - return 1; break; default: @@ -2480,29 +2475,29 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_CCLASS: + case NT_CCLASS: { int v; - CClassNode* yc = &(NCCLASS(y)); + CClassNode* yc = NCCLASS(y); for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_CCLASS_NOT(xc)) || - (v == 0 && IS_CCLASS_NOT(xc))) { + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_CCLASS_NOT(yc)) || - (v == 0 && IS_CCLASS_NOT(yc))) + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) return 0; } } - if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc))) + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) return 1; return 0; } break; - case N_STRING: + case NT_STR: goto swap; break; @@ -2512,30 +2507,30 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: { - StrNode* xs = &(NSTRING(x)); + StrNode* xs = NSTR(x); if (NSTRING_LEN(x) == 0) break; c = *(xs->s); switch (ytype) { - case N_CTYPE: - switch (NCTYPE(y).type) { - case CTYPE_WORD: - return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1); - break; - case CTYPE_NOT_WORD: - return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0); + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); break; default: break; } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* cc = &(NCCLASS(y)); + CClassNode* cc = NCCLASS(y); code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); @@ -2543,10 +2538,10 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: { UChar *q; - StrNode* ys = &(NSTRING(y)); + StrNode* ys = NSTR(y); len = NSTRING_LEN(x); if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { @@ -2580,40 +2575,34 @@ get_head_value_node(Node* node, int exact, regex_t* reg) Node* n = NULL_NODE; switch (NTYPE(node)) { - case N_BACKREF: - case N_ALT: - case N_ANYCHAR: + case NT_BREF: + case NT_ALT: + case NT_CANY: #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: #endif break; - case N_CTYPE: - case N_CCLASS: + case NT_CTYPE: + case NT_CCLASS: if (exact == 0) { n = node; } break; - case N_LIST: - n = get_head_value_node(NCONS(node).left, exact, reg); + case NT_LIST: + n = get_head_value_node(NCAR(node), exact, reg); break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); if (sn->end <= sn->s) break; if (exact != 0 && !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { -#if 0 - UChar* tmp = sn->s; - if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, - &tmp, sn->end)) - n = node; -#endif } else { n = node; @@ -2621,9 +2610,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; @@ -2633,31 +2622,31 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType options = reg->options; - reg->options = NEFFECT(node).option; - n = get_head_value_node(NEFFECT(node).target, exact, reg); + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); reg->options = options; } break; - case EFFECT_MEMORY: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_MEMORY: + case ENCLOSE_STOP_BACKTRACK: n = get_head_value_node(en->target, exact, reg); break; } } break; - case N_ANCHOR: - if (NANCHOR(node).type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node).target, exact, reg); + case NT_ANCHOR: + if (NANCHOR(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node)->target, exact, reg); break; default: @@ -2668,45 +2657,46 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } static int -check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) { int type, r = 0; type = NTYPE(node); - if ((type & type_mask) == 0) + if ((NTYPE2BIT(type) & type_mask) == 0) return 1; switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = check_type_tree(NCAR(node), type_mask, enclose_mask, + anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask, + case NT_QTFR: + r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, anchor_mask); break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); - if ((en->type & effect_mask) == 0) + EncloseNode* en = NENCLOSE(node); + if ((en->type & enclose_mask) == 0) return 1; - r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask); + r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); } break; - case N_ANCHOR: - type = NANCHOR(node).type; + case NT_ANCHOR: + type = NANCHOR(node)->type; if ((type & anchor_mask) == 0) return 1; - if (NANCHOR(node).target) - r = check_type_tree(NANCHOR(node).target, - type_mask, effect_mask, anchor_mask); + if (NANCHOR(node)->target) + r = check_type_tree(NANCHOR(node)->target, + type_mask, enclose_mask, anchor_mask); break; default: @@ -2728,7 +2718,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node *x; OnigDistance min; @@ -2736,40 +2726,40 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) x = node; do { - ret = subexp_inf_recursive_check(NCONS(x).left, env, head); + ret = subexp_inf_recursive_check(NCAR(x), env, head); if (ret < 0 || ret == RECURSION_INFINITE) return ret; r |= ret; if (head) { - ret = get_min_match_length(NCONS(x).left, &min, env); + ret = get_min_match_length(NCAR(x), &min, env); if (ret != 0) return ret; if (min != 0) head = 0; } - } while (IS_NOT_NULL(x = NCONS(x).right)); + } while (IS_NOT_NULL(x = NCDR(x))); } break; - case N_ALT: + case NT_ALT: { int ret; r = RECURSION_EXIST; do { - ret = subexp_inf_recursive_check(NCONS(node).left, env, head); + ret = subexp_inf_recursive_check(NCAR(node), env, head); if (ret < 0 || ret == RECURSION_INFINITE) return ret; r &= ret; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: - r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head); + case NT_QTFR: + r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); if (r == RECURSION_EXIST) { - if (NQUANTIFIER(node).lower == 0) r = 0; + if (NQTFR(node)->lower == 0) r = 0; } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2781,19 +2771,19 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) } break; - case N_CALL: - r = subexp_inf_recursive_check(NCALL(node).target, env, head); + case NT_CALL: + r = subexp_inf_recursive_check(NCALL(node)->target, env, head); break; - case N_EFFECT: - if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) return 0; - else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); else { - SET_EFFECT_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NEFFECT(node).target, env, head); - CLEAR_EFFECT_STATUS(node, NST_MARK2); + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); } break; @@ -2812,20 +2802,20 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = subexp_inf_recursive_check_trav(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = subexp_inf_recursive_check_trav(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env); + case NT_QTFR: + r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2837,15 +2827,15 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); - if (IS_EFFECT_RECURSION(en)) { - SET_EFFECT_STATUS(node, NST_MARK1); + if (IS_ENCLOSE_RECURSION(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); r = subexp_inf_recursive_check(en->target, env, 1); if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_EFFECT_STATUS(node, NST_MARK1); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } r = subexp_inf_recursive_check_trav(en->target, env); } @@ -2862,25 +2852,23 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) static int subexp_recursive_check(Node* node) { - int type; int r = 0; - type = NTYPE(node); - switch (type) { - case N_LIST: - case N_ALT: + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: do { - r |= subexp_recursive_check(NCONS(node).left); - } while (IS_NOT_NULL(node = NCONS(node).right)); + r |= subexp_recursive_check(NCAR(node)); + } while (IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = subexp_recursive_check(NQUANTIFIER(node).target); + case NT_QTFR: + r = subexp_recursive_check(NQTFR(node)->target); break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2892,20 +2880,20 @@ subexp_recursive_check(Node* node) } break; - case N_CALL: - r = subexp_recursive_check(NCALL(node).target); + case NT_CALL: + r = subexp_recursive_check(NCALL(node)->target); if (r != 0) SET_CALL_RECURSION(node); break; - case N_EFFECT: - if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) return 0; - else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) return 1; /* recursion */ else { - SET_EFFECT_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NEFFECT(node).target); - CLEAR_EFFECT_STATUS(node, NST_MARK2); + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NENCLOSE(node)->target); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); } break; @@ -2927,29 +2915,29 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: { int ret; do { - ret = subexp_recursive_check_trav(NCONS(node).left, env); + ret = subexp_recursive_check_trav(NCAR(node), env); if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: - r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env); - if (NQUANTIFIER(node).upper == 0) { + case NT_QTFR: + r = subexp_recursive_check_trav(NQTFR(node)->target, env); + if (NQTFR(node)->upper == 0) { if (r == FOUND_CALLED_NODE) - NQUANTIFIER(node).is_refered = 1; + NQTFR(node)->is_refered = 1; } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2961,20 +2949,20 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); - if (! IS_EFFECT_RECURSION(en)) { - if (IS_EFFECT_CALLED(en)) { - SET_EFFECT_STATUS(node, NST_MARK1); + if (! IS_ENCLOSE_RECURSION(en)) { + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); r = subexp_recursive_check(en->target); - if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION); - CLEAR_EFFECT_STATUS(node, NST_MARK1); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } } r = subexp_recursive_check_trav(en->target, env); - if (IS_EFFECT_CALLED(en)) + if (IS_ENCLOSE_CALLED(en)) r |= FOUND_CALLED_NODE; } break; @@ -2994,46 +2982,33 @@ setup_subexp_call(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: do { - r = setup_subexp_call(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: do { - r = setup_subexp_call(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = setup_subexp_call(NQUANTIFIER(node).target, env); + case NT_QTFR: + r = setup_subexp_call(NQTFR(node)->target, env); break; - case N_EFFECT: - r = setup_subexp_call(NEFFECT(node).target, env); + case NT_ENCLOSE: + r = setup_subexp_call(NENCLOSE(node)->target, env); break; - case N_CALL: + case NT_CALL: { - int n, num, *refs; - UChar *p; - CallNode* cn = &(NCALL(node)); + CallNode* cn = NCALL(node); Node** nodes = SCANENV_MEM_NODES(env); -#ifdef USE_NAMED_GROUP - n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); -#else - n = -1; -#endif - if (n <= 0) { - /* name not found, check group number. (?*ddd) */ - p = cn->name; - num = onig_scan_unsigned_number(&p, cn->name_end, env->enc); - if (num <= 0 || p != cn->name_end) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } + if (cn->group_num != 0) { + int gnum = cn->group_num; + #ifdef USE_NAMED_GROUP if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -3041,38 +3016,53 @@ setup_subexp_call(Node* node, ScanEnv* env) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } #endif - if (num > env->num_mem) { + if (gnum > env->num_mem) { onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_GROUP_REFERENCE; } - cn->ref_num = num; - goto set_call_attr; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->ref_num = refs[0]; + +#ifdef USE_NAMED_GROUP set_call_attr: - cn->target = nodes[cn->ref_num]; +#endif + cn->target = nodes[cn->group_num]; if (IS_NULL(cn->target)) { onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_NAME_REFERENCE; } - SET_EFFECT_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); cn->unset_addr_list = env->unset_addr_list; } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: @@ -3100,30 +3090,29 @@ setup_subexp_call(Node* node, ScanEnv* env) static int divide_look_behind_alternatives(Node* node) { - Node tmp_node; Node *head, *np, *insert_node; - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); int anc_type = an->type; head = an->target; - np = NCONS(head).left; - tmp_node = *node; *node = *head; *head = tmp_node; - NCONS(node).left = head; - NANCHOR(head).target = np; + np = NCAR(head); + swap_node(node, head); + NCAR(node) = head; + NANCHOR(head)->target = np; np = node; - while ((np = NCONS(np).right) != NULL_NODE) { + while ((np = NCDR(np)) != NULL_NODE) { insert_node = onig_node_new_anchor(anc_type); - CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY); - NANCHOR(insert_node).target = NCONS(np).left; - NCONS(np).left = insert_node; + CHECK_NULL_RETURN_MEMERR(insert_node); + NANCHOR(insert_node)->target = NCAR(np); + NCAR(np) = insert_node; } if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { np = node; do { - np->type = N_LIST; /* alt -> list */ - } while ((np = NCONS(np).right) != NULL_NODE); + SET_NTYPE(np, NT_LIST); /* alt -> list */ + } while ((np = NCDR(np)) != NULL_NODE); } return 0; } @@ -3132,7 +3121,7 @@ static int setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) { int r, len; - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); r = get_char_length_tree(an->target, reg, &len); if (r == 0) @@ -3156,11 +3145,15 @@ next_setup(Node* node, Node* next_node, regex_t* reg) retry: type = NTYPE(node); - if (type == N_QUANTIFIER) { - QuantifierNode* qn = &(NQUANTIFIER(node)); + if (type == NT_QTFR) { + QtfrNode* qn = NQTFR(node); if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QUANTIFIER_PEEK_NEXT - qn->next_head_exact = get_head_value_node(next_node, 1, reg); +#ifdef USE_QTFR_PEEK_NEXT + Node* n = get_head_value_node(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + qn->next_head_exact = n; + } #endif /* automatic posseivation a*b ==> (?>a*)b */ if (qn->lower <= 1) { @@ -3171,20 +3164,20 @@ next_setup(Node* node, Node* next_node, regex_t* reg) if (IS_NOT_NULL(x)) { y = get_head_value_node(next_node, 0, reg); if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); - SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); swap_node(node, en); - NEFFECT(node).target = en; + NENCLOSE(node)->target = en; } } } } } } - else if (type == N_EFFECT) { - EffectNode* en = &(NEFFECT(node)); - if (en->type == EFFECT_MEMORY) { + else if (type == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { node = en->target; goto retry; } @@ -3194,100 +3187,318 @@ next_setup(Node* node, Node* next_node, regex_t* reg) static int -divide_ambig_string_node_sub(regex_t* reg, int prev_ambig, - UChar* prev_start, UChar* prev, - UChar* end, Node*** tailp, Node** root) +update_string_node_case_fold(regex_t* reg, Node *node) { - UChar *tmp, *wp; - Node* snode; + UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *sbuf, *ebuf, *sp; + int r, i, len, sbuf_size; + StrNode* sn = NSTR(node); + + end = sn->end; + sbuf_size = (end - sn->s) * 2; + sbuf = (UChar* )xmalloc(sbuf_size); + CHECK_NULL_RETURN_MEMERR(sbuf); + ebuf = sbuf + sbuf_size; - if (prev_ambig != 0) { - tmp = prev_start; - wp = prev_start; - while (tmp < prev) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); + sp = sbuf; + p = sn->s; + while (p < end) { + len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); + q = buf; + for (i = 0; i < len; i++) { + if (sp >= ebuf) { + sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); + CHECK_NULL_RETURN_MEMERR(sbuf); + sp = sbuf + sbuf_size; + sbuf_size *= 2; + ebuf = sbuf + sbuf_size; + } + + *sp++ = buf[i]; } - snode = onig_node_new_str(prev_start, wp); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - NSTRING_SET_AMBIG(snode); - if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); } - else { - snode = onig_node_new_str(prev_start, prev); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + + r = onig_node_str_set(node, sbuf, sp); + if (r != 0) { + xfree(sbuf); + return r; + } + + xfree(sbuf); + return 0; +} + +static int +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, + regex_t* reg) +{ + int r; + Node *node; + + node = onig_node_new_str(s, end); + if (IS_NULL(node)) return ONIGERR_MEMORY; + + r = update_string_node_case_fold(reg, node); + if (r != 0) { + onig_node_free(node); + return r; + } + + NSTRING_SET_AMBIG(node); + NSTRING_SET_DONT_GET_OPT_INFO(node); + *rnode = node; + return 0; +} + +static int +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], + UChar *p, int slen, UChar *end, + regex_t* reg, Node **rnode) +{ + int r, i, j, len, varlen; + Node *anode, *var_anode, *snode, *xnode, *an; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + + *rnode = var_anode = NULL_NODE; + + varlen = 0; + for (i = 0; i < item_num; i++) { + if (items[i].byte_len != slen) { + varlen = 1; + break; + } } - if (*tailp == (Node** )0) { - *root = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY); - *tailp = &(NCONS(*root).right); + if (varlen != 0) { + *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(var_anode)) return ONIGERR_MEMORY; + + xnode = onig_node_new_list(NULL, NULL); + if (IS_NULL(xnode)) goto mem_err; + NCAR(var_anode) = xnode; + + anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) goto mem_err; + NCAR(xnode) = anode; } else { - **tailp = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY); - *tailp = &(NCONS(**tailp).right); + *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) return ONIGERR_MEMORY; } - return 0; + snode = onig_node_new_str(p, p + slen); + if (IS_NULL(snode)) goto mem_err; + + NCAR(anode) = snode; + + for (i = 0; i < item_num; i++) { + snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + + for (j = 0; j < items[i].code_len; j++) { + len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); + if (len < 0) { + r = len; + goto mem_err2; + } + + r = onig_node_str_cat(snode, buf, buf + len); + if (r != 0) goto mem_err2; + } + + an = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(an)) { + goto mem_err2; + } + + if (items[i].byte_len != slen) { + Node *rem; + UChar *q = p + items[i].byte_len; + + if (q < end) { + r = expand_case_fold_make_rem_string(&rem, q, end, reg); + if (r != 0) { + onig_node_free(an); + goto mem_err2; + } + + xnode = onig_node_list_add(NULL_NODE, snode); + if (IS_NULL(xnode)) { + onig_node_free(an); + onig_node_free(rem); + goto mem_err2; + } + if (IS_NULL(onig_node_list_add(xnode, rem))) { + onig_node_free(an); + onig_node_free(xnode); + onig_node_free(rem); + goto mem_err; + } + + NCAR(an) = xnode; + } + else { + NCAR(an) = snode; + } + + NCDR(var_anode) = an; + var_anode = an; + } + else { + NCAR(an) = snode; + NCDR(anode) = an; + anode = an; + } + } + + return varlen; + + mem_err2: + onig_node_free(snode); + + mem_err: + onig_node_free(*rnode); + + return ONIGERR_MEMORY; } static int -divide_ambig_string_node(Node* node, regex_t* reg) +expand_case_fold_string(Node* node, regex_t* reg) { - StrNode* sn = &NSTRING(node); - int ambig, prev_ambig; - UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp; - Node *root = NULL_NODE; - Node **tailp = (Node** )0; - int r; +#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 - start = prev_start = p = sn->s; - end = sn->end; - if (p >= end) return 0; + int r, n, len, alt_num; + UChar *start, *end, *p; + Node *top_root, *root, *snode, *prev_node; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + StrNode* sn = NSTR(node); - prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end); + if (NSTRING_IS_AMBIG(node)) return 0; + start = sn->s; + end = sn->end; + if (start >= end) return 0; + + r = 0; + top_root = root = prev_node = snode = NULL_NODE; + alt_num = 1; + p = start; while (p < end) { - prev = p; - if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, - reg->ambig_flag, &p, end))) { + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, + p, end, items); + if (n < 0) { + r = n; + goto err; + } + + len = enclen(reg->enc, p); + + if (n == 0) { + if (IS_NULL(snode)) { + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } - r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev, - end, &tailp, &root); - if (r != 0) return r; + prev_node = snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } + } - prev_ambig = ambig; - prev_start = prev; + r = onig_node_str_cat(snode, p, p + len); + if (r != 0) goto err; } - } + else { + alt_num *= (n + 1); + if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } - if (prev_start == start) { - if (prev_ambig != 0) { - NSTRING_SET_AMBIG(node); - tmp = start; - wp = start; - while (tmp < end) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); + r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); + if (r < 0) goto mem_err; + if (r == 1) { + if (IS_NULL(root)) { + top_root = prev_node; + } + else { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + + root = NCAR(prev_node); + } + else { /* r == 0 */ + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } } - if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node); - sn->end = wp; + + snode = NULL_NODE; } + + p += len; } - else { - r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end, - end, &tailp, &root); - if (r != 0) return r; - swap_node(node, root); - onig_node_str_clear(root); /* should be after swap! */ - onig_node_free(root); /* free original string node */ + if (p < end) { + Node *srem; + + r = expand_case_fold_make_rem_string(&srem, p, end, reg); + if (r != 0) goto mem_err; + + if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(srem); + onig_node_free(prev_node); + goto mem_err; + } + } + + if (IS_NULL(root)) { + prev_node = srem; + } + else { + if (IS_NULL(onig_node_list_add(root, srem))) { + onig_node_free(srem); + goto mem_err; + } + } } + /* ending */ + top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); + swap_node(node, top_root); + onig_node_free(top_root); return 0; + + mem_err: + r = ONIGERR_MEMORY; + + err: + onig_node_free(top_root); + return r; } + #ifdef USE_COMBINATION_EXPLOSION_CHECK #define CEC_THRES_NUM_BIG_REPEAT 512 @@ -3305,31 +3516,31 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node* prev = NULL_NODE; do { - r = setup_comb_exp_check(NCONS(node).left, r, env); - prev = NCONS(node).left; - } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_comb_exp_check(NCAR(node), r, env); + prev = NCAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_ALT: + case NT_ALT: { int ret; do { - ret = setup_comb_exp_check(NCONS(node).left, state, env); + ret = setup_comb_exp_check(NCAR(node), state, env); r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: + case NT_QTFR: { int child_state = state; int add_state = 0; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); Node* target = qn->target; int var_num; @@ -3340,11 +3551,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == N_EFFECT) { - EffectNode* en = &(NEFFECT(qn->target)); - if (en->type == EFFECT_MEMORY) { - if (NTYPE(en->target) == N_QUANTIFIER) { - QuantifierNode* q = &(NQUANTIFIER(en->target)); + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); if (IS_REPEAT_INFINITE(q->upper) && q->greedy == qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); @@ -3390,12 +3601,12 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: { if (env->curr_max_regnum < en->regnum) env->curr_max_regnum = en->regnum; @@ -3412,11 +3623,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&(NCALL(node)))) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) env->has_recursion = 1; else - r = setup_comb_exp_check(NCALL(node).target, state, env); + r = setup_comb_exp_check(NCALL(node)->target, state, env); break; #endif @@ -3449,68 +3660,68 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node* prev = NULL_NODE; do { - r = setup_tree(NCONS(node).left, reg, state, env); + r = setup_tree(NCAR(node), reg, state, env); if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCONS(node).left, reg); + r = next_setup(prev, NCAR(node), reg); } - prev = NCONS(node).left; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + prev = NCAR(node); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_ALT: + case NT_ALT: do { - r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_CCLASS: + case NT_CCLASS: break; - case N_STRING: + case NT_STR: if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = divide_ambig_string_node(node, reg); + r = expand_case_fold_string(node, reg); } break; - case N_CTYPE: - case N_ANYCHAR: + case NT_CTYPE: + case NT_CANY: break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: break; #endif - case N_BACKREF: + case NT_BREF: { int i; int* p; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); } #endif - SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } break; - case N_QUANTIFIER: + case NT_QTFR: { OnigDistance d; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); Node* target = qn->target; if ((state & IN_REPEAT) != 0) { @@ -3522,7 +3733,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r) break; if (d == 0) { qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT r = quantifiers_memory_node_info(target); if (r < 0) break; if (r > 0) { @@ -3535,7 +3746,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* ()* ==> ()?, ()+ ==> () */ qn->upper = 1; if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == N_STRING) { + if (NTYPE(target) == NT_STR) { qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ } } @@ -3551,29 +3762,29 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == N_STRING) { + if (NTYPE(target) == NT_STR) { if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { int len = NSTRING_LEN(target); - StrNode* sn = &(NSTRING(target)); + StrNode* sn = NSTR(target); if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTRING(target).flag); + onig_node_conv_to_str_node(node, NSTR(target)->flag); for (i = 0; i < n; i++) { r = onig_node_str_cat(node, sn->s, sn->end); if (r) break; } onig_node_free(target); - break; /* break case N_QUANTIFIER: */ + break; /* break case NT_QTFR: */ } } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == N_QUANTIFIER) { - QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); if (IS_NOT_NULL(tqn->head_exact)) { qn->head_exact = tqn->head_exact; tqn->head_exact = NULL; @@ -3587,39 +3798,39 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType options = reg->options; - reg->options = NEFFECT(node).option; - r = setup_tree(NEFFECT(node).target, reg, state, env); + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); reg->options = options; } break; - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */ + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } r = setup_tree(en->target, reg, state, env); break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: { Node* target = en->target; r = setup_tree(target, reg, state, env); - if (NTYPE(target) == N_QUANTIFIER) { - QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ int qtype = NTYPE(tqn->target); if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); } } } @@ -3628,9 +3839,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: @@ -3642,11 +3853,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ - ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ - N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL ) + ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ + BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) -#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) -#define ALLOWED_EFFECT_IN_LB_NOT 0 +#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT 0 #define ALLOWED_ANCHOR_IN_LB \ ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) @@ -3656,7 +3867,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCHOR_LOOK_BEHIND: { r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB); + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); @@ -3668,7 +3879,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCHOR_LOOK_BEHIND_NOT: { r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); @@ -3689,7 +3900,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moor search */ static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[], int** int_skip) { int i, len; @@ -3722,11 +3933,11 @@ typedef struct { } MinMaxLen; typedef struct { - MinMaxLen mmd; - OnigEncoding enc; - OnigOptionType options; - OnigAmbigType ambig_flag; - ScanEnv* scan_env; + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigCaseFoldType case_fold_flag; + ScanEnv* scan_env; } OptEnv; typedef struct { @@ -3778,7 +3989,7 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) { + if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else @@ -3810,7 +4021,7 @@ distance_value(MinMaxLen* mm) if (mm->max == ONIG_INFINITE_DISTANCE) return 0; d = mm->max - mm->min; - if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) + if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) /* return dist_vals[d] * 16 / (mm->min + 12); */ return (int )dist_vals[d]; else @@ -4003,7 +4214,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) p = add->s; end = p + add->len; for (i = to->len; p < end; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4018,14 +4229,14 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) } static void -concat_opt_exact_info_str(OptExactInfo* to, - UChar* s, UChar* end, int raw, OnigEncoding enc) +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) { int i, j, len; UChar *p; for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4051,7 +4262,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) for (i = 0; i < to->len && i < add->len; ) { if (to->s[i] != add->s[i]) break; - len = enc_len(env->enc, to->s + i); + len = enclen(env->enc, to->s + i); for (j = 1; j < len; j++) { if (to->s[i+j] != add->s[i+j]) break; @@ -4146,29 +4357,23 @@ add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) static int add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigAmbigType ambig_flag) + OnigEncoding enc, OnigCaseFoldType case_fold_flag) { - int i, n, len; - UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - OnigCodePoint code; - const OnigPairAmbigCodes* pccs; - OnigAmbigType amb; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int i, n; add_char_opt_map_info(map, p[0], enc); - code = ONIGENC_MBC_TO_CODE(enc, p, end); - for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { - if ((amb & ambig_flag) == 0) continue; + case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + if (n < 0) return n; - n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs); - for (i = 0; i < n; i++) { - if (pccs[i].from == code) { - len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf); - if (len < 0) return len; - add_char_opt_map_info(map, buf[0], enc); - } - } + for (i = 0; i < n; i++) { + ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); + add_char_opt_map_info(map, buf[0], enc); } + return 0; } @@ -4341,7 +4546,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { OptEnv nenv; NodeOptInfo nopt; @@ -4349,33 +4554,33 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_opt_env(&nenv, env); do { - r = optimize_node_left(NCONS(nd).left, &nopt, &nenv); + r = optimize_node_left(NCAR(nd), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); concat_left_node_opt_info(env->enc, opt, &nopt); } - } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right)); + } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); } break; - case N_ALT: + case NT_ALT: { NodeOptInfo nopt; Node* nd = node; do { - r = optimize_node_left(NCONS(nd).left, &nopt, env); + r = optimize_node_left(NCAR(nd), &nopt, env); if (r == 0) { if (nd == node) copy_node_opt_info(opt, &nopt); else alt_merge_node_opt_info(opt, &nopt, env); } - } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right)); + } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); int slen = sn->end - sn->s; int is_raw = NSTRING_IS_RAW(node); @@ -4388,25 +4593,26 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) set_mml(&opt->len, slen, slen); } else { - int n, max; + int max; - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 1; + if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + int n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->case_fold_flag); + if (r != 0) break; + } - if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->ambig_flag); - if (r != 0) break; + max = slen; } - if (NSTRING_IS_AMBIG_REDUCE(node)) { - n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; - } - else { - max = slen; - } set_mml(&opt->len, slen, max); } @@ -4415,14 +4621,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_CCLASS: + case NT_CCLASS: { int i, z; - CClassNode* cc = &(NCCLASS(node)); + CClassNode* cc = NCCLASS(node); /* no need to check ignore case. (setted in setup_tree()) */ - if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) { + if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -4431,7 +4637,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); - if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) { + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { add_char_opt_map_info(&opt->map, (UChar )i, env->enc); } } @@ -4440,7 +4646,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_CTYPE: + case NT_CTYPE: { int i, min, max; @@ -4449,21 +4655,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (max == 1) { min = 1; - switch (NCTYPE(node).type) { - case CTYPE_NOT_WORD: - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - break; - - case CTYPE_WORD: - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } break; } } @@ -4474,7 +4681,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_ANYCHAR: + case NT_CANY: { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -4482,22 +4689,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_ANCHOR: - switch (NANCHOR(node).type) { + case NT_ANCHOR: + switch (NANCHOR(node)->type) { case ANCHOR_BEGIN_BUF: case ANCHOR_BEGIN_POSITION: case ANCHOR_BEGIN_LINE: case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: - add_opt_anc_info(&opt->anc, NANCHOR(node).type); + add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; case ANCHOR_PREC_READ: { NodeOptInfo nopt; - r = optimize_node_left(NANCHOR(node).target, &nopt, env); + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); if (r == 0) { if (nopt.exb.len > 0) copy_opt_exact_info(&opt->expr, &nopt.exb); @@ -4519,13 +4726,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_BACKREF: + case NT_BREF: { int i; int* backs; OnigDistance min, max, tmin, tmax; Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); @@ -4549,31 +4756,31 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&(NCALL(node)))) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); else { OnigOptionType save = env->options; - env->options = NEFFECT(NCALL(node).target).option; - r = optimize_node_left(NCALL(node).target, opt, env); + env->options = NENCLOSE(NCALL(node)->target)->option; + r = optimize_node_left(NCALL(node)->target, opt, env); env->options = save; } break; #endif - case N_QUANTIFIER: + case NT_QTFR: { int i; OnigDistance min, max; NodeOptInfo nopt; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); r = optimize_node_left(qn->target, &nopt, env); if (r) break; if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && - NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { + NTYPE(qn->target) == NT_CANY && qn->greedy) { if (IS_MULTILINE(env->options)) add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else @@ -4585,7 +4792,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_node_opt_info(opt, &nopt); if (nopt.exb.len > 0) { if (nopt.exb.reach_end) { - for (i = 2; i < qn->lower && + for (i = 2; i <= qn->lower && ! is_full_opt_exact_info(&opt->exb); i++) { concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); } @@ -4614,12 +4821,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType save = env->options; @@ -4629,7 +4836,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { @@ -4637,8 +4844,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = 0; max = ONIG_INFINITE_DISTANCE; - if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len; - if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; set_mml(&opt->len, min, max); } else @@ -4653,7 +4860,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: r = optimize_node_left(en->target, opt, env); break; } @@ -4681,7 +4888,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case) { reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(reg->exact); xmemcpy(reg->exact, e->s, e->len); reg->exact_end = reg->exact + e->len; reg->optimize = ONIG_OPTIMIZE_EXACT_IC; @@ -4689,8 +4896,8 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { int allow_reverse; - reg->exact = k_strdup(e->s, e->s + e->len); - CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + reg->exact = str_dup(e->s, e->s + e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); reg->exact_end = reg->exact + e->len; allow_reverse = @@ -4755,9 +4962,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) NodeOptInfo opt; OptEnv env; - env.enc = reg->enc; - env.options = reg->options; - env.ambig_flag = reg->ambig_flag; + env.enc = reg->enc; + env.options = reg->options; + env.case_fold_flag = reg->case_fold_flag; env.scan_env = scan_env; clear_mml(&env.mmd); @@ -4839,7 +5046,7 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, fputc((int )code, fp); } - p += enc_len(enc, p); + p += enclen(enc, p); } } else { @@ -4971,19 +5178,21 @@ print_optimize_info(FILE* f, regex_t* reg) #endif /* ONIG_DEBUG */ -static void +extern void onig_free_body(regex_t* reg) { - if (IS_NOT_NULL(reg->p)) xfree(reg->p); - if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); - if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + if (IS_NOT_NULL(reg)) { + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); #ifdef USE_NAMED_GROUP - onig_names_free(reg); + onig_names_free(reg); #endif + } } extern void @@ -5043,84 +5252,6 @@ onig_chain_reduce(regex_t* reg) } } -#if 0 -extern int -onig_clone(regex_t** to, regex_t* from) -{ - int r, size; - regex_t* reg; - -#ifdef USE_MULTI_THREAD_SYSTEM - if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(from); - if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(from); - ONIG_STATE_INC(from); - } - } - else { - int n = 0; - while (ONIG_STATE(from) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - ONIG_STATE_INC(from); - } -#endif /* USE_MULTI_THREAD_SYSTEM */ - - r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, - from->enc, ONIG_SYNTAX_DEFAULT); - if (r != 0) { - ONIG_STATE_DEC(from); - return r; - } - - xmemcpy(reg, from, sizeof(onig_t)); - reg->chain = (regex_t* )NULL; - reg->state = ONIG_STATE_NORMAL; - - if (from->p) { - reg->p = (UChar* )xmalloc(reg->alloc); - if (IS_NULL(reg->p)) goto mem_error; - xmemcpy(reg->p, from->p, reg->alloc); - } - - if (from->exact) { - reg->exact = (UChar* )xmalloc(from->exact_end - from->exact); - if (IS_NULL(reg->exact)) goto mem_error; - reg->exact_end = reg->exact + (from->exact_end - from->exact); - xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact); - } - - if (from->int_map) { - size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; - reg->int_map = (int* )xmalloc(size); - if (IS_NULL(reg->int_map)) goto mem_error; - xmemcpy(reg->int_map, from->int_map, size); - } - - if (from->int_map_backward) { - size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; - reg->int_map_backward = (int* )xmalloc(size); - if (IS_NULL(reg->int_map_backward)) goto mem_error; - xmemcpy(reg->int_map_backward, from->int_map_backward, size); - } - -#ifdef USE_NAMED_GROUP - reg->name_table = names_clone(from); /* names_clone is not implemented */ -#endif - - ONIG_STATE_DEC(from); - *to = reg; - return 0; - - mem_error: - ONIG_STATE_DEC(from); - return ONIGERR_MEMORY; -} -#endif - #ifdef ONIG_DEBUG static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); #endif @@ -5141,6 +5272,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, UnsetAddrList uslist; #endif + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + reg->state = ONIG_STATE_COMPILING; #ifdef ONIG_DEBUG @@ -5182,10 +5315,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } #endif -#ifdef ONIG_DEBUG_PARSE_TREE - print_tree(stderr, root); -#endif - #ifdef USE_SUBEXP_CALL if (scan_env.num_call > 0) { r = unset_addr_list_init(&uslist, scan_env.num_call); @@ -5207,6 +5336,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = setup_tree(root, reg, 0, &scan_env); if (r != 0) goto err_unset; +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + reg->capture_history = scan_env.capture_history; reg->bt_mem_start = scan_env.bt_mem_start; reg->bt_mem_start |= reg->capture_history; @@ -5308,7 +5441,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } } - if (IS_NOT_NULL(root)) onig_node_free(root); + onig_node_free(root); if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) xfree(scan_env.mem_nodes_dynamic); return r; @@ -5338,12 +5471,16 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, static int onig_inited = 0; extern int -onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, - OnigEncoding enc, OnigSyntaxType* syntax) +onig_reg_init(regex_t* reg, OnigOptionType option, + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, OnigSyntaxType* syntax) { if (! onig_inited) onig_init(); + if (IS_NULL(reg)) + return ONIGERR_INVALID_ARGUMENT; + if (ONIGENC_IS_UNDEF(enc)) return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; @@ -5352,9 +5489,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; } - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - (*reg)->state = ONIG_STATE_MODIFY; + (reg)->state = ONIG_STATE_MODIFY; if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { option |= syntax->options; @@ -5363,24 +5498,36 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, else option |= syntax->options; - (*reg)->enc = enc; - (*reg)->options = option; - (*reg)->syntax = syntax; - (*reg)->optimize = 0; - (*reg)->exact = (UChar* )NULL; - (*reg)->int_map = (int* )NULL; - (*reg)->int_map_backward = (int* )NULL; - (*reg)->chain = (regex_t* )NULL; + (reg)->enc = enc; + (reg)->options = option; + (reg)->syntax = syntax; + (reg)->optimize = 0; + (reg)->exact = (UChar* )NULL; + (reg)->int_map = (int* )NULL; + (reg)->int_map_backward = (int* )NULL; + (reg)->chain = (regex_t* )NULL; + + (reg)->p = (UChar* )NULL; + (reg)->alloc = 0; + (reg)->used = 0; + (reg)->name_table = (void* )NULL; + + (reg)->case_fold_flag = case_fold_flag; + return 0; +} - (*reg)->p = (UChar* )NULL; - (*reg)->alloc = 0; - (*reg)->used = 0; - (*reg)->name_table = (void* )NULL; +extern int +onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax, OnigErrorInfo* einfo) +{ + int r; - (*reg)->ambig_flag = ambig_flag; - (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc); + r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) return r; - return 0; + r = onig_compile(reg, pattern, pattern_end, einfo); + return r; } extern int @@ -5390,33 +5537,35 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, { int r; - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; - r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, - enc, syntax); - if (r) return r; + r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) goto err; r = onig_compile(*reg, pattern, pattern_end, einfo); if (r) { + err: onig_free(*reg); *reg = NULL; } return r; } + extern int onig_init(void) { if (onig_inited != 0) return 0; - onig_inited = 1; - THREAD_SYSTEM_INIT; THREAD_ATOMIC_START; + onig_inited = 1; + onigenc_init(); - onigenc_set_default_caseconv_table((UChar* )0); + /* onigenc_set_default_caseconv_table((UChar* )0); */ #ifdef ONIG_DEBUG_STATISTICS onig_statistics_init(); @@ -5430,8 +5579,6 @@ onig_init(void) extern int onig_end(void) { - extern int onig_free_shared_cclass_table(void); - THREAD_ATOMIC_START; #ifdef ONIG_DEBUG_STATISTICS @@ -5442,7 +5589,7 @@ onig_end(void) onig_free_shared_cclass_table(); #endif -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE onig_free_node_list(); #endif @@ -5453,6 +5600,64 @@ onig_end(void) return 0; } +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +extern int +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_NCCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return onig_is_code_in_cc_len(len, code, cc); +} + #ifdef ONIG_DEBUG @@ -5514,7 +5719,7 @@ OnigOpInfoType OnigOpInfo[] = { { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, @@ -5706,7 +5911,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, break; case OP_EXACT1_IC: - len = enc_len(enc, bp); + len = enclen(enc, bp); p_string(f, len, bp); bp += len; break; @@ -5781,7 +5986,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, } break; - case OP_BACKREF_AT_LEVEL: + case OP_BACKREF_WITH_LEVEL: { OnigOptionType option; LengthType level; @@ -5889,27 +6094,27 @@ print_indent_tree(FILE* f, Node* node, int indent) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: - if (NTYPE(node) == N_LIST) + case NT_LIST: + case NT_ALT: + if (NTYPE(node) == NT_LIST) fprintf(f, "\n", (int )node); else fprintf(f, "\n", (int )node); - print_indent_tree(f, NCONS(node).left, indent + add); - while (IS_NOT_NULL(node = NCONS(node).right)) { + print_indent_tree(f, NCAR(node), indent + add); + while (IS_NOT_NULL(node = NCDR(node))) { if (NTYPE(node) != type) { fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); exit(0); } - print_indent_tree(f, NCONS(node).left, indent + add); + print_indent_tree(f, NCAR(node), indent + add); } break; - case N_STRING: + case NT_STR: fprintf(f, "", (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTRING(node).s; p < NSTRING(node).end; p++) { + for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); else { @@ -5918,11 +6123,11 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_CCLASS: + case NT_CCLASS: fprintf(f, "", (int )node); - if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node).mbuf) { - BBuf* bbuf = NCCLASS(node).mbuf; + if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node)->mbuf) { + BBuf* bbuf = NCCLASS(node)->mbuf; for (i = 0; i < bbuf->used; i++) { if (i > 0) fprintf(f, ","); fprintf(f, "%0x", bbuf->p[i]); @@ -5930,24 +6135,29 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_CTYPE: + case NT_CTYPE: fprintf(f, " ", (int )node); - switch (NCTYPE(node).type) { - case CTYPE_WORD: fputs("word", f); break; - case CTYPE_NOT_WORD: fputs("not word", f); break; + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) + fputs("not word", f); + else + fputs("word", f); + break; + default: fprintf(f, "ERROR: undefined ctype.\n"); exit(0); } break; - case N_ANYCHAR: + case NT_CANY: fprintf(f, "", (int )node); break; - case N_ANCHOR: + case NT_ANCHOR: fprintf(f, " ", (int )node); - switch (NANCHOR(node).type) { + switch (NANCHOR(node)->type) { case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; case ANCHOR_END_BUF: fputs("end buf", f); break; case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; @@ -5972,10 +6182,10 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_BACKREF: + case NT_BREF: { int* p; - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); p = BACKREFS_P(br); fprintf(f, "", (int )node); for (i = 0; i < br->back_num; i++) { @@ -5986,33 +6196,33 @@ print_indent_tree(FILE* f, Node* node, int indent) break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: { - CallNode* cn = &(NCALL(node)); + CallNode* cn = NCALL(node); fprintf(f, "", (int )node); p_string(f, cn->name_end - cn->name, cn->name); } break; #endif - case N_QUANTIFIER: + case NT_QTFR: fprintf(f, "{%d,%d}%s\n", (int )node, - NQUANTIFIER(node).lower, NQUANTIFIER(node).upper, - (NQUANTIFIER(node).greedy ? "" : "?")); - print_indent_tree(f, NQUANTIFIER(node).target, indent + add); + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); + print_indent_tree(f, NQTFR(node)->target, indent + add); break; - case N_EFFECT: - fprintf(f, " ", (int )node); - switch (NEFFECT(node).type) { - case EFFECT_OPTION: - fprintf(f, "option:%d\n", NEFFECT(node).option); - print_indent_tree(f, NEFFECT(node).target, indent + add); + case NT_ENCLOSE: + fprintf(f, " ", (int )node); + switch (NENCLOSE(node)->type) { + case ENCLOSE_OPTION: + fprintf(f, "option:%d\n", NENCLOSE(node)->option); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); break; - case EFFECT_MEMORY: - fprintf(f, "memory:%d", NEFFECT(node).regnum); + case ENCLOSE_MEMORY: + fprintf(f, "memory:%d", NENCLOSE(node)->regnum); break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: fprintf(f, "stop-bt"); break; @@ -6020,7 +6230,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } fprintf(f, "\n"); - print_indent_tree(f, NEFFECT(node).target, indent + add); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); break; default: @@ -6028,8 +6238,8 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } - if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER && - type != N_EFFECT) + if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && + type != NT_ENCLOSE) fprintf(f, "\n"); fflush(f); } diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c index 958917e122..80903508b8 100644 --- a/ext/mbstring/oniguruma/regenc.c +++ b/ext/mbstring/oniguruma/regenc.c @@ -55,7 +55,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); if (p < s) { - p += enc_len(enc, p); + p += enclen(enc, p); } return p; } @@ -68,7 +68,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, if (p < s) { if (prev) *prev = (const UChar* )p; - p += enc_len(enc, p); + p += enclen(enc, p); } else { if (prev) *prev = (const UChar* )NULL; /* Sorry */ @@ -169,52 +169,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) } } -#ifndef ONIG_RUBY_M17N - -#ifndef NOT_RUBY - -#define USE_APPLICATION_TO_LOWER_CASE_TABLE - -const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; -#endif - -const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0; - -#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE -static const UChar BuiltInAsciiToLowerCaseTable[] = { +const UChar OnigEncAsciiToLowerCaseTable[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -248,7 +203,6 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = { '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', }; -#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */ #ifdef USE_UPPER_CASE_TABLE const UChar OnigEncAsciiToUpperCaseTable[256] = { @@ -288,23 +242,22 @@ const UChar OnigEncAsciiToUpperCaseTable[256] = { #endif const unsigned short OnigEncAsciiCtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, - + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -396,19 +349,10 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { #endif extern void -onigenc_set_default_caseconv_table(const UChar* table) +onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) { - if (table == (const UChar* )0) { -#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE - table = BuiltInAsciiToLowerCaseTable; -#else - return ; -#endif - } - - if (table != OnigEncAsciiToLowerCaseTable) { - OnigEncAsciiToLowerCaseTable = table; - } + /* nothing */ + /* obsoleted. */ } extern UChar* @@ -417,7 +361,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); } -const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { +const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { { 0x41, 0x61 }, { 0x42, 0x62 }, { 0x43, 0x63 }, @@ -443,157 +387,175 @@ const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { { 0x57, 0x77 }, { 0x58, 0x78 }, { 0x59, 0x79 }, - { 0x5a, 0x7a }, - - { 0x61, 0x41 }, - { 0x62, 0x42 }, - { 0x63, 0x43 }, - { 0x64, 0x44 }, - { 0x65, 0x45 }, - { 0x66, 0x46 }, - { 0x67, 0x47 }, - { 0x68, 0x48 }, - { 0x69, 0x49 }, - { 0x6a, 0x4a }, - { 0x6b, 0x4b }, - { 0x6c, 0x4c }, - { 0x6d, 0x4d }, - { 0x6e, 0x4e }, - { 0x6f, 0x4f }, - { 0x70, 0x50 }, - { 0x71, 0x51 }, - { 0x72, 0x52 }, - { 0x73, 0x53 }, - { 0x74, 0x54 }, - { 0x75, 0x55 }, - { 0x76, 0x56 }, - { 0x77, 0x57 }, - { 0x78, 0x58 }, - { 0x79, 0x59 }, - { 0x7a, 0x5a } + { 0x5a, 0x7a } }; extern int -onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) { - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes)); - } - else { - return 0; + OnigCodePoint code; + int i, r; + + for (i = 0; + i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); + i++) { + code = OnigAsciiLowerMap[i].to; + r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); + if (r != 0) return r; + + code = OnigAsciiLowerMap[i].from; + r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); + if (r != 0) return r; } -} -extern int -onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag, - const OnigCompAmbigCodes** ccs) -{ return 0; } extern int -onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, + OnigCaseFoldCodeItem items[]) { - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes)); + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; } - else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; } else return 0; } +static int +ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + static OnigCodePoint ss[] = { 0x73, 0x73 }; + + return (*f)((OnigCodePoint )0xdf, ss, 2, arg); +} + extern int -onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, - const OnigCompAmbigCodes** ccs) +onigenc_apply_all_case_fold_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) { - static const OnigCompAmbigCodes folds[] = { - { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } } - }; + OnigCodePoint code; + int i, r; + + r = onigenc_ascii_apply_all_case_fold(flag, f, arg); + if (r != 0) return r; - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = folds; - return sizeof(folds) / sizeof(OnigCompAmbigCodes); + for (i = 0; i < map_size; i++) { + code = map[i].to; + r = (*f)(map[i].from, &code, 1, arg); + if (r != 0) return r; + + code = map[i].from; + r = (*f)(map[i].to, &code, 1, arg); + if (r != 0) return r; } - else - return 0; + + if (ess_tsett_flag != 0) + return ss_apply_all_case_fold(flag, f, arg); + + return 0; } extern int -onigenc_not_support_get_ctype_code_range(int ctype, - const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) +onigenc_get_case_fold_codes_by_str_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { + /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { + /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (*p == 0xdf && ess_tsett_flag != 0) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else { + int i; + + for (i = 0; i < map_size; i++) { + if (*p == map[i].from) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].to; + return 1; + } + else if (*p == map[i].to) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].from; + return 1; + } + } + } + + return 0; +} + + +extern int +onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, + OnigCodePoint* sb_out ARG_UNUSED, + const OnigCodePoint* ranges[] ARG_UNUSED) { return ONIG_NO_SUPPORT_CONFIG; } @@ -609,57 +571,43 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) /* for single byte encodings */ extern int -onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end, - UChar* lower) +onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, + const UChar*end ARG_UNUSED, UChar* lower) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); - } - else { - *lower = **p; - } + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); (*p)++; return 1; /* return byte length of converted char to lower */ } +#if 0 extern int -onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag, +onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; (*pp)++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - else { - return FALSE; - } + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } +#endif extern int -onigenc_single_byte_mbc_enc_len(const UChar* p) +onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) { return 1; } extern OnigCodePoint -onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end) +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { return (OnigCodePoint )(*p); } extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code) -{ - return 1; -} - -extern int -onigenc_single_byte_code_to_mbc_first(OnigCodePoint code) +onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) { - return (code & 0xff); + return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); } extern int @@ -670,19 +618,22 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) } extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s) +onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, + const UChar* s) { return (UChar* )s; } extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end) +onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED) { return TRUE; } extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end) +onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED) { return FALSE; } @@ -693,7 +644,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) int c, i, len; OnigCodePoint n; - len = enc_len(enc, p); + len = enclen(enc, p); n = (OnigCodePoint )(*p++); if (len == 1) return n; @@ -706,54 +657,46 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) } extern int -onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower) { int len; const UChar *p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); (*pp)++; return 1; } else { - len = enc_len(enc, p); - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } + int i; + + len = enclen(enc, p); + for (i = 0; i < len; i++) { + *lower++ = *p++; } (*pp) += len; return len; /* return byte length of converted to lower char */ } } +#if 0 extern int -onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, +onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { (*pp)++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - else { - return FALSE; - } + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } - (*pp) += enc_len(enc, p); + (*pp) += enclen(enc, p); return FALSE; } +#endif extern int onigenc_mb2_code_to_mbclen(OnigCodePoint code) @@ -771,40 +714,6 @@ onigenc_mb4_code_to_mbclen(OnigCodePoint code) else return 1; } -extern int -onigenc_mb2_code_to_mbc_first(OnigCodePoint code) -{ - int first; - - if ((code & 0xff00) != 0) { - first = (code >> 8) & 0xff; - } - else { - return (int )code; - } - return first; -} - -extern int -onigenc_mb4_code_to_mbc_first(OnigCodePoint code) -{ - int first; - - if ((code & 0xff000000) != 0) { - first = (code >> 24) & 0xff; - } - else if ((code & 0xff0000) != 0) { - first = (code >> 16) & 0xff; - } - else if ((code & 0xff00) != 0) { - first = (code >> 8) & 0xff; - } - else { - return (int )code; - } - return first; -} - extern int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) { @@ -816,8 +725,8 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(enc, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } @@ -839,12 +748,46 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(enc, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } +extern int +onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + static PosixBracketEntryType PBS[] = { + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int len; + + len = onigenc_strlen(enc, p, end); + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (len == pb->len && + onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + return pb->ctype; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + extern int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype) @@ -852,8 +795,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); else { - if ((ctype & (ONIGENC_CTYPE_WORD | - ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); } } @@ -868,8 +810,7 @@ onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); else { - if ((ctype & (ONIGENC_CTYPE_WORD | - ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); } } @@ -891,138 +832,71 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, if (x) return x; sascii++; - p += enc_len(enc, p); + p += enclen(enc, p); } return 0; } -#else /* ONIG_RUBY_M17N */ - -extern int -onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype) +/* Property management */ +static int +resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) { - switch (ctype) { - case ONIGENC_CTYPE_NEWLINE: - if (code == 0x0a) return 1; - break; - - case ONIGENC_CTYPE_ALPHA: - return m17n_isalpha(enc, code); - break; - case ONIGENC_CTYPE_BLANK: - return ONIGENC_IS_CODE_BLANK(enc, (int )(code)); - break; - case ONIGENC_CTYPE_CNTRL: - return m17n_iscntrl(enc, code); - break; - case ONIGENC_CTYPE_DIGIT: - return m17n_isdigit(enc, code); - break; - case ONIGENC_CTYPE_GRAPH: - return ONIGENC_IS_CODE_GRAPH(enc, (int )(code)); - break; - case ONIGENC_CTYPE_LOWER: - return m17n_islower(enc, code); - break; - case ONIGENC_CTYPE_PRINT: - return m17n_isprint(enc, code); - break; - case ONIGENC_CTYPE_PUNCT: - return m17n_ispunct(enc, code); - break; - case ONIGENC_CTYPE_SPACE: - return m17n_isspace(enc, code); - break; - case ONIGENC_CTYPE_UPPER: - return m17n_isupper(enc, code); - break; - case ONIGENC_CTYPE_XDIGIT: - return m17n_isxdigit(enc, code); - break; - case ONIGENC_CTYPE_WORD: - return m17n_iswchar(enc, code); - break; - case ONIGENC_CTYPE_ASCII: - return (code < 128 ? TRUE : FALSE); - break; - case ONIGENC_CTYPE_ALNUM: - return m17n_isalnum(enc, code); - break; - default: - break; + int size; + const OnigCodePoint **list = *plist; + + size = sizeof(OnigCodePoint*) * new_size; + if (IS_NULL(list)) { + list = (const OnigCodePoint** )xmalloc(size); + } + else { + list = (const OnigCodePoint** )xrealloc((void* )list, size); } - return 0; -} + if (IS_NULL(list)) return ONIGERR_MEMORY; -extern int -onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - int c, len; + *plist = list; + *psize = new_size; - m17n_mbcput(enc, code, buf); - c = m17n_firstbyte(enc, code); - len = enc_len(enc, c); - return len; + return 0; } extern int -onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf) +onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, + hash_table_type **table, const OnigCodePoint*** plist, int *pnum, + int *psize) { - unsigned int c, low; - - c = m17n_codepoint(enc, p, p + enc_len(enc, *p)); - low = m17n_tolower(enc, c); - m17n_mbcput(enc, low, buf); +#define PROP_INIT_SIZE 16 - return m17n_codelen(enc, low); -} + int r; -extern int -onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, - UChar** pp, UChar* end) -{ - int len; - unsigned int c; - UChar* p = *pp; + if (*psize <= *pnum) { + int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); + r = resize_property_list(new_size, plist, psize); + if (r != 0) return r; + } - len = enc_len(enc, *p); - (*pp) += len; - c = m17n_codepoint(enc, p, p + len); + (*plist)[*pnum] = prop; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - if (m17n_isupper(enc, c) || m17n_islower(enc, c)) - return TRUE; + if (ONIG_IS_NULL(*table)) { + *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); + if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; } - return FALSE; + *pnum = *pnum + 1; + onig_st_insert_strend(*table, name, name + strlen((char* )name), + (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); + return 0; } -extern UChar* -onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) +extern int +onigenc_property_list_init(int (*f)(void)) { - UChar *p; - int len; + int r; - if (s <= start) return s; - p = s; + THREAD_ATOMIC_START; - while (!m17n_islead(enc, *p) && p > start) p--; - while (p + (len = enc_len(enc, *p)) < s) { - p += len; - } - if (p + len == s) return s; - return p; -} + r = f(); -extern int -onigenc_is_allowed_reverse_match(OnigEncoding enc, - const UChar* s, const UChar* end) -{ - return ONIGENC_IS_SINGLEBYTE(enc); + THREAD_ATOMIC_END; + return r; } - -extern void -onigenc_set_default_caseconv_table(UChar* table) { } - -#endif /* ONIG_RUBY_M17N */ diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h index 58ee3e7f22..40963280dc 100644 --- a/ext/mbstring/oniguruma/regenc.h +++ b/ext/mbstring/oniguruma/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,11 +29,23 @@ * SUCH DAMAGE. */ -#ifndef RUBY_PLATFORM +#ifndef PACKAGE +/* PACKAGE is defined in config.h */ #include "config.h" #endif + +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif + #include "oniguruma.h" +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigPairCaseFoldCodes; + + #ifndef NULL #define NULL ((void* )0) #endif @@ -46,45 +58,73 @@ #define FALSE 0 #endif -/* error codes */ -#define ONIGENCERR_MEMORY -5 -#define ONIGENCERR_TYPE_BUG -6 -#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400 -#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#ifndef ARG_UNUSED +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif +#endif #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) - -#ifdef ONIG_RUBY_M17N - -#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF - -#else /* ONIG_RUBY_M17N */ - -#define USE_UNICODE_FULL_RANGE_CTYPE -/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */ +#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) + +/* character types bit flag */ +#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) +#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) +#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) +#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) +#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) +#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) +#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) +#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) +#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) +#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) +#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) +#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) +#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) +#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) +#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) + +#define CTYPE_TO_BIT(ctype) (1<<(ctype)) +#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ + ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ + (ctype) == ONIGENC_CTYPE_PRINT) + + +typedef struct { + UChar *name; + int ctype; + short int len; +} PosixBracketEntryType; + + +/* #define USE_CRNL_AS_LINE_TERMINATOR */ +#define USE_UNICODE_PROPERTIES +/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ + #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII /* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); -ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); -ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); -ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); + /* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end)); +ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code)); ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); @@ -92,37 +132,36 @@ ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s /* methods for multi byte encoding */ ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end)); +ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code)); ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code)); ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes)); /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); +ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); + +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ OnigEncISO_8859_1_ToLowerCaseTable[c] #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ OnigEncISO_8859_1_ToUpperCaseTable[c] -#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ - ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0) ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; -ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; - -#endif /* is not ONIG_RUBY_M17N */ ONIG_EXTERN int onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); @@ -133,15 +172,18 @@ onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable; +ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; +#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ - ((OnigEncAsciiCtypeTable[code] & ctype) != 0) + ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ - ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)) + (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ + ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) + #endif /* REGENC_H */ diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index d6ec91856d..385e560d98 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -85,9 +85,9 @@ onig_error_code_to_format(int code) case ONIGERR_END_PATTERN_AT_CONTROL: p = "end pattern at control"; break; case ONIGERR_META_CODE_SYNTAX: - p = "illegal meta-code syntax"; break; + p = "invalid meta-code syntax"; break; case ONIGERR_CONTROL_CODE_SYNTAX: - p = "illegal control-code syntax"; break; + p = "invalid control-code syntax"; break; case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: p = "char-class value at end of range"; break; case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: @@ -142,8 +142,8 @@ onig_error_code_to_format(int code) p = "too big wide-char value"; break; case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: p = "too long wide-char value"; break; - case ONIGERR_INVALID_WIDE_CHAR_VALUE: - p = "invalid wide-char value"; break; + case ONIGERR_INVALID_CODE_POINT_VALUE: + p = "invalid code point value"; break; case ONIGERR_EMPTY_GROUP_NAME: p = "group name is empty"; break; case ONIGERR_INVALID_GROUP_NAME: @@ -182,6 +182,15 @@ onig_error_code_to_format(int code) return (UChar* )p; } +static void sprint_byte(char* s, unsigned int v) +{ + sprintf(s, "%02x", (v & 0377)); +} + +static void sprint_byte_with_x(char* s, unsigned int v) +{ + sprintf(s, "\\x%02x", (v & 0377)); +} static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, UChar buf[], int buf_size, int *is_over) @@ -196,10 +205,17 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, while (p < end) { code = ONIGENC_MBC_TO_CODE(enc, p, end); if (code >= 0x80) { - if (len + 5 <= buf_size) { - sprintf((char* )(&(buf[len])), "\\%03o", - (unsigned int)(code & 0377)); - len += 5; + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; } else { break; @@ -209,7 +225,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, buf[len++] = (UChar )code; } - p += enc_len(enc, p); + p += enclen(enc, p); if (len >= buf_size) break; } @@ -317,7 +333,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) va_list args; va_init_list(args, fmt); - n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); + n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); need = (pat_end - pat) * 4 + 4; @@ -328,17 +344,17 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) p = pat; while (p < pat_end) { - if (*p == MC_ESC(enc)) { + if (*p == '\\') { *s++ = *p++; - len = enc_len(enc, p); + len = enclen(enc, p); while (len-- > 0) *s++ = *p++; } else if (*p == '/') { - *s++ = (unsigned char )MC_ESC(enc); + *s++ = (unsigned char )'\\'; *s++ = *p++; } else if (ONIGENC_IS_MBC_HEAD(enc, p)) { - len = enc_len(enc, p); + len = enclen(enc, p); if (ONIGENC_MBC_MINLEN(enc) == 1) { while (len-- > 0) *s++ = *p++; } @@ -346,7 +362,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) int blen; while (len-- > 0) { - sprintf((char* )bs, "\\%03o", *p++ & 0377); + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (blen-- > 0) *s++ = *bp++; @@ -355,7 +371,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprintf((char* )bs, "\\%03o", *p++ & 0377); + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (len-- > 0) *s++ = *bp++; diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 918aa67aa8..7430d78514 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,10 +29,12 @@ #include "regint.h" +#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + #ifdef USE_CRNL_AS_LINE_TERMINATOR #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ - ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end)) + ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif #ifdef USE_CAPTURE_HISTORY @@ -111,7 +113,7 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) (OnigCaptureTreeNode** )xrealloc(parent->childs, sizeof(OnigCaptureTreeNode*) * n); } - CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(parent->childs); for (i = parent->allocated; i < n; i++) { parent->childs[i] = (OnigCaptureTreeNode* )0; } @@ -196,7 +198,7 @@ onig_region_resize(OnigRegion* region, int n) return 0; } -extern int +static int onig_region_resize_clear(OnigRegion* region, int n) { int r; @@ -297,47 +299,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ #define INVALID_STACK_INDEX -1 -typedef long StackIndex; - -typedef struct _StackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - StackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ - StackIndex start; /* prev. info (for backtrack "(...)*" ) */ - StackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } null_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} StackType; /* stack type */ /* used by normal-POP */ @@ -365,22 +326,6 @@ typedef struct _StackType { #define STK_MASK_TO_VOID_TARGET 0x10ff #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ -typedef struct { - void* stack_p; - int stack_n; - OnigOptionType options; - OnigRegion* region; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} MatchArg; - #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ @@ -388,14 +333,14 @@ typedef struct { (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).best_len = ONIG_MISMATCH;\ -} while (0) +} while(0) #else #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ -} while (0) +} while(0) #endif #ifdef USE_COMBINATION_EXPLOSION_CHECK @@ -424,14 +369,14 @@ typedef struct { (msa).state_check_buff = (void* )0;\ (msa).state_check_buff_size = 0;\ }\ -} while (0) + } while(0) #define MATCH_ARG_FREE(msa) do {\ if ((msa).stack_p) xfree((msa).stack_p);\ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ if ((msa).state_check_buff) xfree((msa).state_check_buff);\ }\ -} while (0); +} while(0) #else #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) @@ -442,15 +387,15 @@ typedef struct { #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ if (msa->stack_p) {\ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ - stk_alloc = (StackType* )(msa->stack_p);\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ stk_base = stk_alloc;\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ else {\ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ - + sizeof(StackType) * (stack_num));\ - stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + + sizeof(OnigStackType) * (stack_num));\ + stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ stk_base = stk_alloc;\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ @@ -480,11 +425,11 @@ onig_set_match_stack_limit_size(unsigned int size) } static int -stack_double(StackType** arg_stk_base, StackType** arg_stk_end, - StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) +stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) { unsigned int n; - StackType *x, *stk_base, *stk_end, *stk; + OnigStackType *x, *stk_base, *stk_end, *stk; stk_base = *arg_stk_base; stk_end = *arg_stk_end; @@ -492,12 +437,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, n = stk_end - stk_base; if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (StackType* )xmalloc(sizeof(StackType) * n * 2); + x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); if (IS_NULL(x)) { STACK_SAVE; return ONIGERR_MEMORY; } - xmemcpy(x, stk_base, n * sizeof(StackType)); + xmemcpy(x, stk_base, n * sizeof(OnigStackType)); n *= 2; } else { @@ -508,7 +453,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, else n = MatchStackLimitSize; } - x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); + x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); if (IS_NULL(x)) { STACK_SAVE; return ONIGERR_MEMORY; @@ -680,7 +625,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, level--;\ }\ }\ -} while (0) +} while(0) #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ int level = 0;\ @@ -698,7 +643,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ k++;\ }\ -} while (0) +} while(0) #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ @@ -844,7 +789,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) #define STACK_STOP_BT_END do {\ - StackType *k = stk;\ + OnigStackType *k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ @@ -859,7 +804,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) #define STACK_NULL_CHECK(isnull,id,s) do {\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ @@ -874,7 +819,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ int level = 0;\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ @@ -894,7 +839,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ @@ -934,7 +879,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ int level = 0;\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ @@ -996,11 +941,11 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, else if (k->type == STK_CALL_FRAME) level--;\ else if (k->type == STK_RETURN) level++;\ }\ -} while (0) +} while(0) #define STACK_RETURN(addr) do {\ int level = 0;\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_RETURN"); \ @@ -1023,25 +968,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ } while(0) -#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\ - if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ +#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ goto fail; \ } while(0) -static int string_cmp_ic(OnigEncoding enc, int ambig_flag, +static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, UChar* s1, UChar** ps2, int mblen) { - UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN]; - UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN]; - UChar *p1, *p2, *end, *s2, *end2; + UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p1, *p2, *end1, *s2, *end2; int len1, len2; s2 = *ps2; - end = s1 + mblen; + end1 = s1 + mblen; end2 = s2 + mblen; - while (s1 < end) { - len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1); - len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2); + while (s1 < end1) { + len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1); + len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2); if (len1 != len2) return 0; p1 = buf1; p2 = buf2; @@ -1065,31 +1010,36 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag, }\ } while(0) -#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\ - if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ +#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ is_fail = 1; \ else \ is_fail = 0; \ } while(0) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#define IS_EMPTY_STR (str == end) - -#define DATA_ENSURE(n) \ - if (s + (n) > end) goto fail - +#define IS_EMPTY_STR (str == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define DATA_ENSURE_CHECK1 (s < right_range) +#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#else +#define DATA_ENSURE_CHECK1 (s < end) #define DATA_ENSURE_CHECK(n) (s + (n) <= end) +#define DATA_ENSURE(n) if (s + (n) > end) goto fail +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + #ifdef USE_CAPTURE_HISTORY static int -make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, - StackType* stk_top, UChar* str, regex_t* reg) +make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, + OnigStackType* stk_top, UChar* str, regex_t* reg) { int n, r; OnigCaptureTreeNode* child; - StackType* k = *kp; + OnigStackType* k = *kp; while (k < stk_top) { if (k->type == STK_MEM_START) { @@ -1097,7 +1047,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && BIT_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); - CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(child); child->group = n; child->beg = (int )(k->u.mem.pstr - str); r = history_tree_add_child(node, child); @@ -1124,7 +1074,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } #endif -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL static int mem_is_in_memp(int mem, int num, UChar* memp) { int i; @@ -1138,13 +1088,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp) } static int backref_match_at_nested_level(regex_t* reg - , StackType* top, StackType* stk_base - , int ignore_case, int ambig_flag + , OnigStackType* top, OnigStackType* stk_base + , int ignore_case, int case_fold_flag , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; - StackType* k; + OnigStackType* k; level = 0; k = top; @@ -1166,7 +1116,7 @@ static int backref_match_at_nested_level(regex_t* reg ss = *s; if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, ambig_flag, + if (string_cmp_ic(reg->enc, case_fold_flag, pstart, &ss, (int )(pend - pstart)) == 0) return 0; /* or goto next_mem; */ } @@ -1192,70 +1142,8 @@ static int backref_match_at_nested_level(regex_t* reg return 0; } -#endif /* USE_BACKREF_AT_LEVEL */ - - -#ifdef RUBY_PLATFORM - -typedef struct { - int state; - regex_t* reg; - MatchArg* msa; - StackType* stk_base; -} TrapEnsureArg; - -static VALUE -trap_ensure(VALUE arg) -{ - TrapEnsureArg* ta = (TrapEnsureArg* )arg; - - if (ta->state == 0) { /* trap_exec() is not normal return */ - ONIG_STATE_DEC_THREAD(ta->reg); - if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) - xfree(ta->stk_base); - - MATCH_ARG_FREE(*(ta->msa)); - } - - return Qnil; -} +#endif /* USE_BACKREF_WITH_LEVEL */ -static VALUE -trap_exec(VALUE arg) -{ - TrapEnsureArg* ta; - - rb_trap_exec(); - - ta = (TrapEnsureArg* )arg; - ta->state = 1; /* normal return */ - return Qnil; -} - -extern void -onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base) -{ - VALUE arg; - TrapEnsureArg ta; - - ta.state = 0; - ta.reg = reg; - ta.msa = msa; - ta.stk_base = stk_base; - arg = (VALUE )(&ta); - rb_ensure(trap_exec, arg, trap_ensure, arg); -} - -#define CHECK_INTERRUPT_IN_MATCH_AT do {\ - if (rb_trap_pending) {\ - if (! rb_prohibit_interrupt) {\ - onig_exec_trap(reg, msa, stk_base);\ - }\ - }\ -} while (0) -#else -#define CHECK_INTERRUPT_IN_MATCH_AT -#endif /* RUBY_PLATFORM */ #ifdef ONIG_DEBUG_STATISTICS @@ -1288,41 +1176,26 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define STAT_OP_IN(opcode) do {\ +#define MOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ -} while (0) +} while(0) -#define STAT_OP_OUT do {\ +#define MOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while (0) - -#ifdef RUBY_PLATFORM - -/* - * :nodoc: - */ -static VALUE onig_stat_print(void) -{ - onig_print_statistics(stderr); - return Qnil; -} -#endif +} while(0) -extern void onig_statistics_init(void) +extern void +onig_statistics_init(void) { int i; for (i = 0; i < 256; i++) { OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; } MaxStackDepth = 0; - -#ifdef RUBY_PLATFORM - rb_define_global_function("onig_stat_print", onig_stat_print, 0); -#endif } extern void @@ -1341,73 +1214,15 @@ onig_print_statistics(FILE* f) stk++;\ if (stk - stk_base > MaxStackDepth) \ MaxStackDepth = stk - stk_base;\ -} while (0) +} while(0) #else #define STACK_INC stk++ -#define STAT_OP_IN(opcode) -#define STAT_OP_OUT +#define MOP_IN(opcode) +#define MOP_OUT #endif -extern int -onig_is_in_code_range(const UChar* p, OnigCodePoint code) -{ - OnigCodePoint n, *data; - OnigCodePoint low, high, x; - - GET_CODE_POINT(n, p); - data = (OnigCodePoint* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (code > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && code >= data[low * 2]) ? 1 : 0); -} - -static int -is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_CCLASS_NOT(cc)) - return !found; - else - return found; -} - -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int len; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - len = 2; - } - else { - len = ONIGENC_CODE_TO_MBCLEN(enc, code); - } - return is_code_in_cc(len, code, cc); -} - /* matching region of POSIX API */ typedef int regoff_t; @@ -1420,8 +1235,11 @@ typedef struct { /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ static int -match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, - UChar* sprev, MatchArg* msa) +match_at(regex_t* reg, const UChar* str, const UChar* end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar* right_range, +#endif + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1431,15 +1249,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, RelAddrType addr; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; - OnigAmbigType ambig_flag = reg->ambig_flag; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; UChar *s, *q, *sbegin; UChar *p = reg->p; char *alloca_base; - StackType *stk_alloc, *stk_base, *stk, *stk_end; - StackType *stkp; /* used as any purpose. */ - StackIndex si; - StackIndex *repeat_stk; - StackIndex *mem_start_stk, *mem_end_stk; + OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stkp; /* used as any purpose. */ + OnigStackIndex si; + OnigStackIndex *repeat_stk; + OnigStackIndex *mem_start_stk, *mem_end_stk; #ifdef USE_COMBINATION_EXPLOSION_CHECK int scv; unsigned char* state_check_buff = msa->state_check_buff; @@ -1450,9 +1268,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); pop_level = reg->stack_pop_level; num_mem = reg->num_mem; - repeat_stk = (StackIndex* )alloca_base; + repeat_stk = (OnigStackIndex* )alloca_base; - mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat); + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); mem_end_stk = mem_start_stk + num_mem; mem_start_stk--; /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ @@ -1480,13 +1298,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enc_len(encode, q); + len = enclen(encode, q); while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } else { xmemcpy(bp, "\"", 1); bp += 1; } *bp = 0; - fputs(buf, stderr); + fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); onig_print_compiled_byte_code(stderr, p, NULL, encode); fprintf(stderr, "\n"); @@ -1495,7 +1313,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, sbegin = s; switch (*p++) { - case OP_END: STAT_OP_IN(OP_END); + case OP_END: MOP_IN(OP_END); n = s - sstart; if (n > best_len) { OnigRegion* region; @@ -1512,7 +1330,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, best_len = n; region = msa->region; if (region) { -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; @@ -1535,7 +1353,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } else { -#endif /* USE_POSIX_REGION_OPTION */ +#endif /* USE_POSIX_API_REGION_OPTION */ region->beg[0] = sstart - str; region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { @@ -1561,7 +1379,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (IS_NULL(region->history_root)) { region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(node); } else { node = region->history_root; @@ -1581,7 +1399,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION } /* else IS_POSIX_REGION() */ #endif } /* if (region) */ @@ -1590,14 +1408,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - STAT_OP_OUT; + MOP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { best_len = ONIG_MISMATCH; goto fail; /* for retry */ } - if (IS_FIND_LONGEST(option) && s < end) { + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { goto fail; /* for retry */ } } @@ -1606,7 +1424,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto finish; break; - case OP_EXACT1: STAT_OP_IN(OP_EXACT1); + case OP_EXACT1: MOP_IN(OP_EXACT1); #if 0 DATA_ENSURE(1); if (*p != *s) goto fail; @@ -1615,19 +1433,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s++) goto fail; DATA_ENSURE(0); p++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); { int len; - UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; DATA_ENSURE(1); - ss = s; - sp = p; - - len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { @@ -1637,21 +1455,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; q++; } } - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACT2: STAT_OP_IN(OP_EXACT2); + case OP_EXACT2: MOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT3: STAT_OP_IN(OP_EXACT3); + case OP_EXACT3: MOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1660,11 +1478,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT4: STAT_OP_IN(OP_EXACT4); + case OP_EXACT4: MOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1675,11 +1493,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT5: STAT_OP_IN(OP_EXACT5); + case OP_EXACT5: MOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1692,25 +1510,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTN: STAT_OP_IN(OP_EXACTN); + case OP_EXACTN: MOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); { int len; - UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; @@ -1718,35 +1536,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, while (p < endp) { sprev = s; DATA_ENSURE(1); - ss = s; - sp = p; - - len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) { - goto fail; - } + if (*p != *q) goto fail; p++; q++; } } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1757,11 +1573,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1776,11 +1592,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -1790,11 +1606,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; } sprev = s - 2; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -1806,11 +1622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; } sprev = s - 3; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); + case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -1820,19 +1636,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; } sprev = s - tlen; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_CCLASS: STAT_OP_IN(OP_CCLASS); + case OP_CCLASS: MOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - STAT_OP_OUT; + s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ + MOP_OUT; break; - case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -1843,7 +1659,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, int mb_len; DATA_ENSURE(1); - mb_len = enc_len(encode, s); + mb_len = enclen(encode, s); DATA_ENSURE(mb_len); ss = s; s += mb_len; @@ -1858,10 +1674,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, #endif } p += tlen; - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1876,18 +1692,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += tlen; s++; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s); - STAT_OP_OUT; + s += enclen(encode, s); + MOP_OUT; break; - case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -1901,9 +1717,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, { OnigCodePoint code; UChar *ss; - int mb_len = enc_len(encode, s); + int mb_len = enclen(encode, s); - if (s + mb_len > end) { + if (! DATA_ENSURE_CHECK(mb_len)) { DATA_ENSURE(1); s = (UChar* )end; p += tlen; @@ -1925,10 +1741,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += tlen; cc_mb_not_success: - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1943,10 +1759,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += tlen; s++; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; @@ -1955,49 +1771,49 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, DATA_ENSURE(1); GET_POINTER_INC(node, p); - mb_len = enc_len(encode, s); + mb_len = enclen(encode, s); ss = s; s += mb_len; DATA_ENSURE(0); code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (is_code_in_cc(mb_len, code, node) == 0) goto fail; + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); + case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); s += n; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); - while (s < end) { + case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); - while (s < end) { + case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s); + n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -2008,31 +1824,31 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, s++; } } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); - while (s < end) { + case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } p++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); - while (s < end) { + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s); - if (n >1) { + n = enclen(encode, s); + if (n > 1) { DATA_ENSURE(n); sprev = s; s += n; @@ -2043,36 +1859,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } p++; - STAT_OP_OUT; + MOP_OUT; break; #ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); GET_STATE_CHECK_NUM_INC(mem, p); - while (s < end) { + while (DATA_ENSURE_CHECK1) { STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } - STAT_OP_OUT; + MOP_OUT; break; case OP_STATE_CHECK_ANYCHAR_ML_STAR: - STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); GET_STATE_CHECK_NUM_INC(mem, p); - while (s < end) { + while (DATA_ENSURE_CHECK1) { STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enc_len(encode, s); + n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -2083,29 +1899,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, s++; } } - STAT_OP_OUT; + MOP_OUT; break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_WORD: STAT_OP_IN(OP_WORD); + case OP_WORD: MOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s); - STAT_OP_OUT; + s += enclen(encode, s); + MOP_OUT; break; - case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); + case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s); - STAT_OP_OUT; + s += enclen(encode, s); + MOP_OUT; break; - case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); + case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) @@ -2120,13 +1936,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, == ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); + case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; } else if (ON_STR_END(s)) { @@ -2138,25 +1954,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, != ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) { + case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } } goto fail; break; - case OP_WORD_END: STAT_OP_IN(OP_WORD_END); + case OP_WORD_END: MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } } @@ -2164,80 +1980,81 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, break; #endif - case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_END_BUF: STAT_OP_IN(OP_END_BUF); + case OP_END_BUF: MOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - STAT_OP_OUT; + MOP_OUT; continue; } goto fail; break; - case OP_END_LINE: STAT_OP_IN(OP_END_LINE); + case OP_END_LINE: MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ - STAT_OP_OUT; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enc_len(encode, s))) { - STAT_OP_OUT; + ON_STR_END(s + enclen(encode, s))) { + MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - UChar* ss = s + enc_len(encode, s); - if (ON_STR_END(ss + enc_len(encode, ss))) { - STAT_OP_OUT; + UChar* ss = s + enclen(encode, s); + ss += enclen(encode, ss); + if (ON_STR_END(ss)) { + MOP_OUT; continue; } } @@ -2245,79 +2062,79 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto fail; break; - case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); + case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (StackIndex )((void* )s); - STAT_OP_OUT; + mem_start_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); + case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (StackIndex )((void* )s); - STAT_OP_OUT; + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; continue; break; #ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); + case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (StackIndex )((void* )s); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); if (BIT_STATUS_AT(reg->bt_mem_start, mem)) mem_start_stk[mem] = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - STAT_OP_OUT; + MOP_OUT; continue; break; #endif - case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); + case OP_BACKREF1: MOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); + case OP_BACKREF2: MOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); + case OP_BACKREFN: MOP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); backref: { @@ -2342,15 +2159,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); + case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); GET_MEMNUM_INC(mem, p); { int len; @@ -2373,16 +2190,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, n = pend - pstart; DATA_ENSURE(n); sprev = s; - STRING_CMP_IC(ambig_flag, pstart, &s, n); - while (sprev + (len = enc_len(encode, sprev)) < s) + STRING_CMP_IC(case_fold_flag, pstart, &s, n); + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2409,19 +2226,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); break; /* success */ } if (i == tlen) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); + case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2445,23 +2262,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, DATA_ENSURE(n); sprev = s; swork = s; - STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail); + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); break; /* success */ } if (i == tlen) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; -#ifdef USE_BACKREF_AT_LEVEL - case OP_BACKREF_AT_LEVEL: +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_WITH_LEVEL: { int len; OnigOptionType ic; @@ -2472,9 +2289,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, GET_LENGTH_INC(tlen, p); sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag - , (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enc_len(encode, sprev)) < s) + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * tlen); @@ -2482,35 +2299,37 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, else goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; #endif - - case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); + +#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); + case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - STAT_OP_OUT; + MOP_OUT; continue; break; +#endif - case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START); + case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_NULL_CHECK_START(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END); + case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); { int isnull; @@ -2540,12 +2359,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } } - STAT_OP_OUT; + MOP_OUT; continue; break; -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK - case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST); +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; @@ -2560,19 +2379,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto null_check_found; } } - STAT_OP_OUT; + MOP_OUT; continue; break; #endif #ifdef USE_SUBEXP_CALL case OP_NULL_CHECK_END_MEMST_PUSH: - STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); { int isnull; GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); #else STACK_NULL_CHECK_REC(isnull, mem, s); @@ -2589,39 +2408,39 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STACK_PUSH_NULL_CHECK_END(mem); } } - STAT_OP_OUT; + MOP_OUT; continue; break; #endif - case OP_JUMP: STAT_OP_IN(OP_JUMP); + case OP_JUMP: MOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_PUSH: STAT_OP_IN(OP_PUSH); + case OP_PUSH: MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH); + case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); GET_STATE_CHECK_NUM_INC(mem, p); STATE_CHECK_VAL(scv, mem); if (scv) goto fail; GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); GET_STATE_CHECK_NUM_INC(mem, p); GET_RELADDR_INC(addr, p); STATE_CHECK_VAL(scv, mem); @@ -2631,54 +2450,54 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, else { STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK); + case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); GET_STATE_CHECK_NUM_INC(mem, p); STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_STATE_CHECK(s, mem); - STAT_OP_OUT; + MOP_OUT; continue; break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_POP: STAT_OP_IN(OP_POP); + case OP_POP: MOP_IN(OP_POP); STACK_POP_ONE; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); - if (*p == *s && DATA_ENSURE_CHECK(1)) { + if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; } p += (addr + 1); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; } p++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT: STAT_OP_IN(OP_REPEAT); + case OP_REPEAT: MOP_IN(OP_REPEAT); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2691,11 +2510,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STACK_PUSH_ALT(p + addr, s, sprev); } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2709,11 +2528,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += addr; } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2731,19 +2550,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2765,68 +2584,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); + case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); STACK_PUSH_POS(s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_POP_POS: STAT_OP_IN(OP_POP_POS); + case OP_POP_POS: MOP_IN(OP_POP_POS); { STACK_POS_END(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT); + case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); GET_RELADDR_INC(addr, p); STACK_PUSH_POS_NOT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS); + case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); STACK_POP_TIL_POS_NOT; goto fail; break; - case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT); + case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); STACK_PUSH_STOP_BT; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT); + case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); STACK_STOP_BT_END; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); + case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2841,28 +2660,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT); + case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); STACK_POP_TIL_LOOK_BEHIND_NOT; goto fail; break; #ifdef USE_SUBEXP_CALL - case OP_CALL: STAT_OP_IN(OP_CALL); + case OP_CALL: MOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_RETURN: STAT_OP_IN(OP_RETURN); + case OP_RETURN: MOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - STAT_OP_OUT; + MOP_OUT; continue; break; #endif @@ -2872,9 +2691,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, break; fail: - STAT_OP_OUT; + MOP_OUT; /* fall */ - case OP_FAIL: STAT_OP_IN(OP_FAIL); + case OP_FAIL: MOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; @@ -2887,7 +2706,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } #endif - STAT_OP_OUT; + MOP_OUT; continue; break; @@ -2943,32 +2762,25 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s += enc_len(enc, s); + s += enclen(enc, s); } return (UChar* )NULL; } static int -str_lower_case_match(OnigEncoding enc, int ambig_flag, +str_lower_case_match(OnigEncoding enc, int case_fold_flag, const UChar* t, const UChar* tend, const UChar* p, const UChar* end) { int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - const UChar* tsave; - const UChar* psave; - - tsave = t; - psave = p; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; while (t < tend) { - lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); + lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) { - return 0; - } + if (*t++ != *q++) return 0; lowlen--; } } @@ -2977,7 +2789,7 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, } static UChar* -slow_search_ic(OnigEncoding enc, int ambig_flag, +slow_search_ic(OnigEncoding enc, int case_fold_flag, UChar* target, UChar* target_end, const UChar* text, const UChar* text_end, UChar* text_range) { @@ -2991,10 +2803,11 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, s = (UChar* )text; while (s < end) { - if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) return s; - s += enc_len(enc, s); + s += enclen(enc, s); } return (UChar* )NULL; @@ -3033,7 +2846,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, } static UChar* -slow_search_backward_ic(OnigEncoding enc, int ambig_flag, +slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, UChar* target, UChar* target_end, const UChar* text, const UChar* adjust_text, const UChar* text_end, const UChar* text_start) @@ -3048,7 +2861,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { - if (str_lower_case_match(enc, ambig_flag, + if (str_lower_case_match(enc, case_fold_flag, target, target_end, s, text_end)) return s; @@ -3084,15 +2897,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = se = s + tlen1; t = tail; - while (t >= target && *p == *t) { - p--; t--; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } - if (t < target) return (UChar* )s; - skip = reg->map[*se]; t = s; do { - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } while ((s - t) < skip && s < end); } } @@ -3100,15 +2912,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = se = s + tlen1; t = tail; - while (t >= target && *p == *t) { - p--; t--; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } - if (t < target) return (UChar* )s; - skip = reg->int_map[*se]; t = s; do { - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } while ((s - t) < skip && s < end); } } @@ -3133,10 +2944,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s; t = tail; - while (t >= target && *p == *t) { + while (*p == *t) { + if (t == target) return (UChar* )p; p--; t--; } - if (t < target) return (UChar* )(p + 1); s += reg->map[*s]; } } @@ -3144,10 +2955,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s; t = tail; - while (t >= target && *p == *t) { + while (*p == *t) { + if (t == target) return (UChar* )p; p--; t--; } - if (t < target) return (UChar* )(p + 1); s += reg->int_map[*s]; } } @@ -3155,7 +2966,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, } static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + int** skip) { int i, len; @@ -3213,7 +3025,7 @@ map_search(OnigEncoding enc, UChar map[], while (s < text_range) { if (map[*s]) return (UChar* )s; - s += enc_len(enc, s); + s += enclen(enc, s); } return (UChar* )NULL; } @@ -3239,7 +3051,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On { int r; UChar *prev; - MatchArg msa; + OnigMatchArg msa; #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) start: @@ -3275,7 +3087,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On #endif if (region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3286,7 +3098,11 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On if (r == 0) { prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, at, prev, &msa); + r = match_at(reg, str, end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + end, +#endif + at, prev, &msa); } MATCH_ARG_FREE(msa); @@ -3312,7 +3128,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; - while (p < q) p += enc_len(reg->enc, p); + while (p < q) p += enclen(reg->enc, p); } } @@ -3322,7 +3138,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->ambig_flag, + p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; @@ -3343,7 +3159,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (p - reg->dmin < s) { retry_gate: pprev = p; - p += enc_len(reg->enc, p); + p += enclen(reg->enc, p); goto retry; } @@ -3362,10 +3178,12 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, case ANCHOR_END_LINE: if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = (UChar* )onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; +#endif } else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -3443,7 +3261,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, break; case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->ambig_flag, + p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; @@ -3484,12 +3302,14 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ANCHOR_END_LINE: if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(prev)) goto fail; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } +#endif } else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -3532,8 +3352,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, { int r; UChar *s, *prev; - MatchArg msa; + OnigMatchArg msa; const UChar *orig_start = start; +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar *orig_range = range; +#endif #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) start: @@ -3567,7 +3390,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #endif if (region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3577,8 +3400,32 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; + +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#else #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK \ +#define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -3589,7 +3436,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #else -#define MATCH_AND_RETURN_CHECK \ +#define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -3597,7 +3444,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, }\ else goto finish; /* error */ \ } -#endif +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + /* anchor optimize: resume search range */ if (reg->anchor != 0 && str < end) { @@ -3700,10 +3549,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_INIT(msa, option, region, start); #ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ #endif - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(end); goto mismatch; } goto mismatch_no_msa; @@ -3754,9 +3603,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, prev = low_prev; } while (s <= high) { - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } } while (s < range); goto mismatch; @@ -3767,13 +3616,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { do { - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } } while (s < range); goto mismatch; @@ -3782,16 +3631,21 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } do { - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } while (s < range); if (s == range) { /* because empty match with /$/. */ - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); } } else { /* backward search */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + if (orig_start < end) + orig_start += enclen(reg->enc, orig_start); /* is upper range */ +#endif + if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; @@ -3814,7 +3668,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, while (s >= low) { prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_start); s = prev; } } while (s >= range); @@ -3842,7 +3696,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, do { prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_start); s = prev; } while (s >= range); } @@ -3865,7 +3719,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ if (IS_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3906,10 +3760,10 @@ onig_get_options(regex_t* reg) return reg->options; } -extern OnigAmbigType -onig_get_ambig_flag(regex_t* reg) +extern OnigCaseFoldType +onig_get_case_fold_flag(regex_t* reg) { - return reg->ambig_flag; + return reg->case_fold_flag; } extern OnigSyntaxType* diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c index f5ad1f35a2..b1b957b40c 100755 --- a/ext/mbstring/oniguruma/regext.c +++ b/ext/mbstring/oniguruma/regext.c @@ -2,7 +2,7 @@ regext.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -100,7 +100,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e if (to == ONIG_ENCODING_UTF16_BE) { if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { *conv = (UChar* )xmalloc(len * 2); - CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*conv); *conv_end = *conv + (len * 2); conv_ext0be(s, end, *conv); return 0; @@ -108,7 +108,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e else if (from == ONIG_ENCODING_UTF16_LE) { swap16: *conv = (UChar* )xmalloc(len); - CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*conv); *conv_end = *conv + len; conv_swap2bytes(s, end, *conv); return 0; @@ -117,7 +117,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e else if (to == ONIG_ENCODING_UTF16_LE) { if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { *conv = (UChar* )xmalloc(len * 2); - CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*conv); *conv_end = *conv + (len * 2); conv_ext0le(s, end, *conv); return 0; @@ -129,7 +129,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e if (to == ONIG_ENCODING_UTF32_BE) { if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { *conv = (UChar* )xmalloc(len * 4); - CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*conv); *conv_end = *conv + (len * 4); conv_ext0be32(s, end, *conv); return 0; @@ -137,7 +137,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e else if (from == ONIG_ENCODING_UTF32_LE) { swap32: *conv = (UChar* )xmalloc(len); - CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*conv); *conv_end = *conv + len; conv_swap4bytes(s, end, *conv); return 0; @@ -146,7 +146,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e else if (to == ONIG_ENCODING_UTF32_LE) { if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { *conv = (UChar* )xmalloc(len * 4); - CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*conv); *conv_end = *conv + (len * 4); conv_ext0le32(s, end, *conv); return 0; @@ -178,17 +178,24 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, cpat_end = (UChar* )pattern_end; } - r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc, - ci->syntax); + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) { + r = ONIGERR_MEMORY; + goto err2; + } + + r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc, + ci->syntax); if (r) goto err; r = onig_compile(*reg, cpat, cpat_end, einfo); if (r) { + err: onig_free(*reg); *reg = NULL; } - err: + err2: if (cpat != pattern) xfree(cpat); return r; diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c index 248957c9d9..4bd18c45e1 100644 --- a/ext/mbstring/oniguruma/reggnu.c +++ b/ext/mbstring/oniguruma/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -85,7 +85,7 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) OnigErrorInfo einfo; r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); - if (r != 0) { + if (r != ONIG_NORMAL) { if (IS_NOT_NULL(ebuf)) (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); } @@ -108,7 +108,7 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), reg->options, enc, OnigDefaultSyntax, &einfo); - if (r != 0) { + if (r != ONIG_NORMAL) { if (IS_NOT_NULL(ebuf)) (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); } @@ -125,10 +125,13 @@ re_free_pattern(regex_t* reg) extern int re_alloc_pattern(regex_t** reg) { - return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, - ONIGENC_AMBIGUOUS_MATCH_DEFAULT, - OnigEncDefaultCharEncoding, - OnigDefaultSyntax); + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + + return onig_reg_init(*reg, ONIG_OPTION_DEFAULT, + ONIGENC_CASE_FOLD_DEFAULT, + OnigEncDefaultCharEncoding, + OnigDefaultSyntax); } extern void @@ -138,18 +141,8 @@ re_set_casetable(const char* table) } extern void -#ifdef ONIG_RUBY_M17N -re_mbcinit(OnigEncoding enc) -#else re_mbcinit(int mb_code) -#endif { -#ifdef ONIG_RUBY_M17N - - onigenc_set_default_encoding(enc); - -#else - OnigEncoding enc; switch (mb_code) { @@ -171,5 +164,4 @@ re_mbcinit(int mb_code) } onigenc_set_default_encoding(enc); -#endif } diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index d6819d8f94..a0ce4912d8 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,64 +56,77 @@ /* config */ /* spec. config */ -/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL -#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ +#define USE_BACKREF_WITH_LEVEL /* \k, \k */ +#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR /* #define USE_RECOMPILE_API */ -/* treat \r\n as line terminator. - !!! NO SUPPORT !!! - use this configuration on your own responsibility */ -/* #define USE_CRNL_AS_LINE_TERMINATOR */ +/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ /* internal config */ -#define USE_RECYCLE_NODE +#define USE_PARSE_TREE_NODE_RECYCLE #define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QUANTIFIER_PEEK_NEXT -#define USE_ST_HASH_TABLE +#define USE_QTFR_PEEK_NEXT +#define USE_ST_LIBRARY #define USE_SHARED_CCLASS_TABLE #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ -/* interface to external system */ -#ifdef NOT_RUBY /* given from Makefile */ +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + +/* */ +/* escape other system UChar definition */ #include "config.h" -#define USE_BACKREF_AT_LEVEL +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif + +#define USE_WORD_BEGIN_END /* "\<", "\>" */ #define USE_CAPTURE_HISTORY #define USE_VARIABLE_META_CHARS -#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ -#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define USE_POSIX_API_REGION_OPTION #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE /* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ + /* #define USE_MULTI_THREAD_SYSTEM */ -#define THREAD_SYSTEM_INIT /* depend on thread system */ -#define THREAD_SYSTEM_END /* depend on thread system */ -#define THREAD_ATOMIC_START /* depend on thread system */ -#define THREAD_ATOMIC_END /* depend on thread system */ -#define THREAD_PASS /* depend on thread system */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ +#define THREAD_ATOMIC_START /* depend on thread system */ +#define THREAD_ATOMIC_END /* depend on thread system */ +#define THREAD_PASS /* depend on thread system */ #define xmalloc malloc #define xrealloc realloc #define xcalloc calloc #define xfree free -#else -#include "ruby.h" -#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ - -#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */ -#define USE_MULTI_THREAD_SYSTEM -#define THREAD_SYSTEM_INIT -#define THREAD_SYSTEM_END -#define THREAD_ATOMIC_START DEFER_INTS -#define THREAD_ATOMIC_END ENABLE_INTS -#define THREAD_PASS rb_thread_schedule() -#define DEFAULT_WARN_FUNCTION onig_rb_warn -#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning +#define CHECK_INTERRUPT_IN_MATCH_AT -#endif /* else NOT_RUBY */ +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_lookup onig_st_lookup +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free +/* */ +#define onig_st_is_member st_is_member #define STATE_CHECK_STRING_THRESHOLD_LEN 7 #define STATE_CHECK_BUFF_MAX_SIZE 0x4000 @@ -122,17 +135,16 @@ #define xmemset memset #define xmemcpy memcpy #define xmemmove memmove + #if defined(_WIN32) && !defined(__GNUC__) #define xalloca _alloca -#if _MSC_VER < 1500 -#ifndef vsnprintf -#define vsnprintf _vsnprintf -#endif -#endif +#define xvsnprintf _vsnprintf #else #define xalloca alloca +#define xvsnprintf vsnprintf #endif + #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) #define ONIG_STATE_INC(reg) (reg)->state++ #define ONIG_STATE_DEC(reg) (reg)->state-- @@ -154,60 +166,6 @@ #define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#define onig_st_is_member st_is_member - -#ifdef NOT_RUBY - -#define st_init_table onig_st_init_table -#define st_init_table_with_size onig_st_init_table_with_size -#define st_init_numtable onig_st_init_numtable -#define st_init_numtable_with_size onig_st_init_numtable_with_size -#define st_init_strtable onig_st_init_strtable -#define st_init_strtable_with_size onig_st_init_strtable_with_size -#define st_init_strend_table_with_size onig_st_init_strend_table_with_size -#define st_delete onig_st_delete -#define st_delete_safe onig_st_delete_safe -#define st_insert onig_st_insert -#define st_insert_strend onig_st_insert_strend -#define st_lookup onig_st_lookup -#define st_lookup_strend onig_st_lookup_strend -#define st_foreach onig_st_foreach -#define st_add_direct onig_st_add_direct -#define st_add_direct_strend onig_st_add_direct_strend -#define st_free_table onig_st_free_table -#define st_cleanup_safe onig_st_cleanup_safe -#define st_copy onig_st_copy -#define st_nothing_key_clone onig_st_nothing_key_clone -#define st_nothing_key_free onig_st_nothing_key_free - -#else /* NOT_RUBY */ - -#define onig_st_init_table st_init_table -#define onig_st_init_table_with_size st_init_table_with_size -#define onig_st_init_numtable st_init_numtable -#define onig_st_init_numtable_with_size st_init_numtable_with_size -#define onig_st_init_strtable st_init_strtable -#define onig_st_init_strtable_with_size st_init_strtable_with_size -#define onig_st_init_strend_table_with_size st_init_strend_table_with_size -#define onig_st_delete st_delete -#define onig_st_delete_safe st_delete_safe -#define onig_st_insert st_insert -#define onig_st_insert_strend st_insert_strend -#define onig_st_lookup st_lookup -#define onig_st_lookup_strend st_lookup_strend -#define onig_st_foreach st_foreach -#define onig_st_add_direct st_add_direct -#define onig_st_add_direct_strend st_add_direct_strend -#define onig_st_free_table st_free_table -#define onig_st_cleanup_safe st_cleanup_safe -#define onig_st_copy st_copy -#define onig_st_nothing_key_clone st_nothing_key_clone -#define onig_st_nothing_key_free st_nothing_key_free - -#endif /* NOT_RUBY */ - - #ifdef HAVE_STDLIB_H #include #endif @@ -238,7 +196,6 @@ #endif #include "regenc.h" -#include "oniguruma.h" #ifdef MIN #undef MIN @@ -252,10 +209,23 @@ #define IS_NULL(p) (((void*)(p)) == (void*)0) #define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) #define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL -#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val) +#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + +#define PLATFORM_GET_INC(val,p,type) do{\ + val = *(type* )p;\ + (p) += sizeof(type);\ +} while(0) + +#else + +#define PLATFORM_GET_INC(val,p,type) do{\ + xmemcpy(&val, (p), sizeof(type));\ + (p) += sizeof(type);\ +} while(0) + /* sizeof(OnigCodePoint) */ #define WORD_ALIGNMENT_SIZE SIZEOF_LONG @@ -270,86 +240,6 @@ (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ } while (0) - -#define B_SHIFT 8 -#define B_MASK 0xff - -#define SERIALIZE_2BYTE_INT(i,p) do {\ - *(p) = ((i) >> B_SHIFT) & B_MASK;\ - *((p)+1) = (i) & B_MASK;\ -} while (0) - -#define SERIALIZE_4BYTE_INT(i,p) do {\ - *(p) = ((i) >> B_SHIFT*3) & B_MASK;\ - *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\ - *((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\ - *((p)+3) = (i) & B_MASK;\ -} while (0) - -#define SERIALIZE_8BYTE_INT(i,p) do {\ - *(p) = ((i) >> B_SHIFT*7) & B_MASK;\ - *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\ - *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\ - *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\ - *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\ - *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\ - *((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\ - *((p)+7) = (i) & B_MASK;\ -} while (0) - -#define GET_2BYTE_INT_INC(type,i,p) do {\ - (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\ - (p) += 2;\ -} while (0) - -#define GET_4BYTE_INT_INC(type,i,p) do {\ - (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \ - ((unsigned int )((p)[1]) << B_SHIFT*2) | \ - ((unsigned int )((p)[2]) << B_SHIFT ) | \ - ((unsigned int )((p)[3]) )); \ - (p) += 4;\ -} while (0) - -#define GET_8BYTE_INT_INC(type,i,p) do {\ - (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \ - ((unsigned long )((p)[1]) << B_SHIFT*6) | \ - ((unsigned long )((p)[2]) << B_SHIFT*5) | \ - ((unsigned long )((p)[3]) << B_SHIFT*4) | \ - ((unsigned long )((p)[4]) << B_SHIFT*3) | \ - ((unsigned long )((p)[5]) << B_SHIFT*2) | \ - ((unsigned long )((p)[6]) << B_SHIFT ) | \ - ((unsigned long )((p)[7]) )); \ - (p) += 8;\ -} while (0) - -#if SIZEOF_SHORT == 2 -#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p) -#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p) -#elif SIZEOF_SHORT == 4 -#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p) -#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p) -#elif SIZEOF_SHORT == 8 -#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p) -#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p) -#endif - -#if SIZEOF_INT == 2 -#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p) -#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p) -#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p) -#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p) -#elif SIZEOF_INT == 4 -#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p) -#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p) -#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p) -#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p) -#elif SIZEOF_INT == 8 -#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p) -#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p) -#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p) -#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p) -#endif - #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ /* stack pop level */ @@ -372,17 +262,17 @@ typedef unsigned int BitStatusType; #define BIT_STATUS_CLEAR(stats) (stats) = 0 #define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) #define BIT_STATUS_AT(stats,n) \ - ((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) + ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) #define BIT_STATUS_ON_AT(stats,n) do {\ - if ((n) < BIT_STATUS_BITS_NUM)\ + if ((n) < (int )BIT_STATUS_BITS_NUM) \ (stats) |= (1 << (n));\ else\ (stats) |= 1;\ } while (0) #define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ - if ((n) < BIT_STATUS_BITS_NUM)\ + if ((n) < (int )BIT_STATUS_BITS_NUM)\ (stats) |= (1 << (n));\ } while (0) @@ -401,7 +291,6 @@ typedef unsigned int BitStatusType; #define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) #define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) #define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) -#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option)) #define IS_FIND_CONDITION(option) ((option) & \ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) #define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) @@ -415,6 +304,9 @@ typedef unsigned int BitStatusType; /* ignore-case and multibyte status are included in compiled code. */ #define IS_DYNAMIC_OPTION(option) 0 +#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ + ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) + #define REPEAT_INFINITE -1 #define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) @@ -436,7 +328,7 @@ typedef Bits* BitSetRef; #define BITSET_CLEAR(bs) do {\ int i;\ - for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\ + for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ } while (0) #define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] @@ -603,7 +495,7 @@ enum OpCode { OP_BACKREFN_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, - OP_BACKREF_AT_LEVEL, /* \k, \k */ + OP_BACKREF_WITH_LEVEL, /* \k, \k */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -612,9 +504,6 @@ enum OpCode { OP_MEMORY_END, OP_MEMORY_END_REC, /* push marker to stack */ - OP_SET_OPTION_PUSH, /* set option and push recover option */ - OP_SET_OPTION, /* set option */ - OP_FAIL, /* pop stack and move */ OP_JUMP, OP_PUSH, @@ -649,7 +538,11 @@ enum OpCode { OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ OP_STATE_CHECK, /* check only */ OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR + OP_STATE_CHECK_ANYCHAR_ML_STAR, + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + OP_SET_OPTION_PUSH, /* set option and push recover option */ + OP_SET_OPTION /* set option */ }; typedef int RelAddrType; @@ -672,22 +565,6 @@ typedef void* PointerType; #define SIZE_POINTER sizeof(PointerType) -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - -#define PLATFORM_GET_INC(val,p,type) do{\ - val = *(type* )p;\ - (p) += sizeof(type);\ -} while(0) - -#else - -#define PLATFORM_GET_INC(val,p,type) do{\ - xmemcpy(&val, (p), sizeof(type));\ - (p) += sizeof(type);\ -} while(0) - -#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ - #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) #define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) @@ -745,15 +622,15 @@ typedef void* PointerType; #define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) #endif -#define MC_ESC(enc) (enc)->meta_char_table.esc -#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar -#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime -#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time -#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time -#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime +#define MC_ESC(syn) (syn)->meta_char_table.esc +#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar +#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime +#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time +#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time +#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime -#define IS_MC_ESC_CODE(code, enc, syn) \ - ((code) == MC_ESC(enc) && \ +#define IS_MC_ESC_CODE(code, syn) \ + ((code) == MC_ESC(syn) && \ !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) @@ -782,22 +659,96 @@ typedef void* PointerType; ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + +#define NCCLASS_FLAGS(cc) ((cc)->flags) +#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) +#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) +#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) + /* cclass node */ -#define FLAG_CCLASS_NOT 1 -#define FLAG_CCLASS_SHARE (1<<1) +#define FLAG_NCCLASS_NOT (1<<0) +#define FLAG_NCCLASS_SHARE (1<<1) -#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT -#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT -#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE -#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0) -#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0) +#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) +#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) +#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) typedef struct { - int flags; + int type; + /* struct _Node* next; */ + /* unsigned int flags; */ +} NodeBase; + +typedef struct { + NodeBase base; + unsigned int flags; BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ + BBuf* mbuf; /* multi-byte info or NULL */ } CClassNode; +typedef long OnigStackIndex; + +typedef struct _OnigStackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + OnigStackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is setted, if this stack type is MEM-START */ + OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ + OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } null_check; +#ifdef USE_SUBEXP_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + } u; +} OnigStackType; + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +#ifdef USE_COMBINATION_EXPLOSION_CHECK + void* state_check_buff; + int state_check_buff_size; +#endif +} OnigMatchArg; + + +#define IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) #ifdef ONIG_DEBUG @@ -820,11 +771,38 @@ extern void onig_print_statistics P_((FILE* f)); extern UChar* onig_error_code_to_format P_((int code)); extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); extern int onig_bbuf_init P_((BBuf* buf, int size)); -extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax)); extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_chain_reduce P_((regex_t* reg)); extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); extern void onig_transfer P_((regex_t* to, regex_t* from)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); + +/* strend hash */ +typedef void hash_table_type; +typedef unsigned long hash_data_type; + +extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); +extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); +extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); + +/* encoding property management */ +#define PROPERTY_LIST_ADD_PROP(Name, CR) \ + r = onigenc_property_list_add_property((UChar* )Name, CR,\ + &PropertyNameTable, &PropertyList, &PropertyListNum,\ + &PropertyListSize);\ + if (r != 0) goto end + +#define PROPERTY_LIST_INIT_CHECK \ + if (PropertyInited == 0) {\ + int r = onigenc_property_list_init(init_property_list);\ + if (r != 0) return r;\ + } + +extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); + +typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); + +extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); #endif /* REGINT_H */ diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index af5c8593e6..01131300c6 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,9 +28,13 @@ */ #include "regparse.h" +#include "st.h" #define WARN_BUFSIZE 256 +#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + + OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | @@ -41,6 +45,8 @@ OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_OP2_OPTION_RUBY | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | @@ -54,25 +60,20 @@ OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; -extern void onig_null_warn(const char* s) { } - -#ifdef RUBY_PLATFORM -extern void -onig_rb_warn(const char* s) -{ - rb_warn("%s", s); -} - -extern void -onig_rb_warning(const char* s) -{ - rb_warning("%s", s); -} -#endif +extern void onig_null_warn(const char* s ARG_UNUSED) { } #ifdef DEFAULT_WARN_FUNCTION static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; @@ -112,7 +113,7 @@ bbuf_clone(BBuf** rto, BBuf* from) BBuf *to; *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(to); r = BBUF_INIT(to, from->alloc); if (r != 0) return r; to->used = from->used; @@ -120,6 +121,9 @@ bbuf_clone(BBuf** rto, BBuf* from) return 0; } +#define BACKREF_REL_TO_ABS(rel_no, env) \ + ((env)->num_mem + 1 + (rel_no)) + #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) #define MBCODE_START_POS(enc) \ @@ -139,7 +143,7 @@ bbuf_clone(BBuf** rto, BBuf* from) #define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ - for (i = 0; i < BITSET_SIZE; i++) {\ + for (i = 0; i < (int )BITSET_SIZE; i++) {\ if ((bs)[i] != 0) {\ empty = 0; break;\ }\ @@ -160,9 +164,7 @@ static void bitset_set_all(BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { - bs[i] = ~((Bits )0); - } + for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } } #endif @@ -170,45 +172,35 @@ static void bitset_invert(BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { - bs[i] = ~(bs[i]); - } + for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } } static void bitset_invert_to(BitSetRef from, BitSetRef to) { int i; - for (i = 0; i < BITSET_SIZE; i++) { - to[i] = ~(from[i]); - } + for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } } static void bitset_and(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { - dest[i] &= bs[i]; - } + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } } static void bitset_or(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { - dest[i] |= bs[i]; - } + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } } static void bitset_copy(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { - dest[i] = bs[i]; - } + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } } extern int @@ -223,8 +215,8 @@ onig_strncmp(const UChar* s1, const UChar* s2, int n) return 0; } -static void -k_strcpy(UChar* dest, const UChar* src, const UChar* end) +extern void +onig_strcpy(UChar* dest, const UChar* src, const UChar* end) { int len = end - src; if (len > 0) { @@ -233,6 +225,7 @@ k_strcpy(UChar* dest, const UChar* src, const UChar* end) } } +#ifdef USE_NAMED_GROUP static UChar* strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) { @@ -251,7 +244,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) return r; } - +#endif /* scan pattern methods */ #define PEND_VALUE 0 @@ -273,7 +266,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) static UChar* -k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, +strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, int capa) { UChar* r; @@ -284,7 +277,7 @@ k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_e r = (UChar* )xmalloc(capa + 1); CHECK_NULL_RETURN(r); - k_strcpy(r + (dest_end - dest), src, src_end); + onig_strcpy(r + (dest_end - dest), src, src_end); return r; } @@ -297,78 +290,23 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end, r = (UChar* )xmalloc(capa + 1); CHECK_NULL_RETURN(r); - k_strcpy(r, dest, dest_end); - k_strcpy(r + (dest_end - dest), src, src_end); + onig_strcpy(r, dest, dest_end); + onig_strcpy(r + (dest_end - dest), src, src_end); return r; } -#ifdef USE_NAMED_GROUP - -#define INIT_NAME_BACKREFS_ALLOC_NUM 8 - -typedef struct { - UChar* name; - int name_len; /* byte length */ - int back_num; /* number of backrefs */ - int back_alloc; - int back_ref1; - int* back_refs; -} NameEntry; - -#ifdef USE_ST_HASH_TABLE -#include "st.h" +#ifdef USE_ST_LIBRARY typedef struct { - unsigned char* s; - unsigned char* end; -} st_strend_key; - -static int strend_cmp(st_strend_key*, st_strend_key*); -static int strend_hash(st_strend_key*); - -static struct st_hash_type type_strend_hash = { - strend_cmp, - strend_hash, -}; - -static st_table* -onig_st_init_strend_table_with_size(int size) -{ - return onig_st_init_table_with_size(&type_strend_hash, size); -} - -static int -onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value) -{ - st_strend_key key; - - key.s = (unsigned char* )str_key; - key.end = (unsigned char* )end_key; - - return onig_st_lookup(table, (st_data_t )(&key), value); -} - -static int -onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value) -{ - st_strend_key* key; - int result; - - key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); - key->s = (unsigned char* )str_key; - key->end = (unsigned char* )end_key; - result = onig_st_insert(table, (st_data_t )key, value); - if (result) { - xfree(key); - } - return result; -} + UChar* s; + UChar* end; +} st_str_end_key; static int -strend_cmp(st_strend_key* x, st_strend_key* y) +str_end_cmp(st_str_end_key* x, st_str_end_key* y) { - unsigned char *p, *q; + UChar *p, *q; int c; if ((x->end - x->s) != (y->end - y->s)) @@ -387,12 +325,11 @@ strend_cmp(st_strend_key* x, st_strend_key* y) } static int -strend_hash(st_strend_key* x) +str_end_hash(st_str_end_key* x) { - int val; - unsigned char *p; + UChar *p; + int val = 0; - val = 0; p = x->s; while (p < x->end) { val = val * 997 + (int )*p++; @@ -401,6 +338,65 @@ strend_hash(st_strend_key* x) return val + (val >> 5); } +extern hash_table_type* +onig_st_init_strend_table_with_size(int size) +{ + static struct st_hash_type hashType = { + str_end_cmp, + str_end_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type *value) +{ + st_str_end_key key; + + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +extern int +onig_st_insert_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type value) +{ + st_str_end_key* key; + int result; + + key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); + key->s = (UChar* )str_key; + key->end = (UChar* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +#endif /* USE_ST_LIBRARY */ + + +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_LIBRARY + typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -441,10 +437,10 @@ onig_print_names(FILE* fp, regex_t* reg) } return 0; } -#endif +#endif /* ONIG_DEBUG */ static int -i_free_name_entry(UChar* key, NameEntry* e, void* arg) +i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) { xfree(e->name); if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); @@ -501,10 +497,9 @@ typedef struct { } INamesArg; static int -i_names(UChar* key, NameEntry* e, INamesArg* arg) +i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) { int r = (*(arg->func))(e->name, - /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */ e->name + e->name_len, e->back_num, (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), @@ -518,8 +513,7 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg) extern int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) { INamesArg narg; NameTable* t = (NameTable* )reg->name_table; @@ -536,7 +530,7 @@ onig_foreach_name(regex_t* reg, } static int -i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map) +i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) { int i; @@ -575,7 +569,7 @@ onig_number_of_names(regex_t* reg) return 0; } -#else /* USE_ST_HASH_TABLE */ +#else /* USE_ST_LIBRARY */ #define INIT_NAMES_ALLOC_NUM 8 @@ -585,7 +579,6 @@ typedef struct { int alloc; } NameTable; - #ifdef ONIG_DEBUG extern int onig_print_names(FILE* fp, regex_t* reg) @@ -683,8 +676,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) extern int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) { int i, r; NameEntry* e; @@ -713,7 +705,7 @@ onig_number_of_names(regex_t* reg) return 0; } -#endif /* else USE_ST_HASH_TABLE */ +#endif /* else USE_ST_LIBRARY */ static int name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) @@ -727,16 +719,18 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) e = name_find(reg, name, name_end); if (IS_NULL(e)) { -#ifdef USE_ST_HASH_TABLE +#ifdef USE_ST_LIBRARY if (IS_NULL(t)) { t = onig_st_init_strend_table_with_size(5); reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); - CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(e); e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) return ONIGERR_MEMORY; + if (IS_NULL(e->name)) { + xfree(e); return ONIGERR_MEMORY; + } onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), (HashDataType )e); @@ -750,7 +744,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(t)) { alloc = INIT_NAMES_ALLOC_NUM; t = (NameTable* )xmalloc(sizeof(NameTable)); - CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(t); t->e = NULL; t->alloc = 0; t->num = 0; @@ -769,7 +763,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) alloc = t->alloc * 2; t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(t->e); t->alloc = alloc; clear: @@ -784,6 +778,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) e = &(t->e[t->num]); t->num++; e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; e->name_len = name_end - name; #endif } @@ -803,7 +798,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (e->back_num == 2) { alloc = INIT_NAME_BACKREFS_ALLOC_NUM; e->back_refs = (int* )xmalloc(sizeof(int) * alloc); - CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(e->back_refs); e->back_alloc = alloc; e->back_refs[0] = e->back_ref1; e->back_refs[1] = backref; @@ -812,7 +807,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (e->back_num > e->back_alloc) { alloc = e->back_alloc * 2; e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(e->back_refs); e->back_alloc = alloc; } e->back_refs[e->back_num - 1] = backref; @@ -826,9 +821,8 @@ extern int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, int** nums) { - NameEntry* e; + NameEntry* e = name_find(reg, name, name_end); - e = name_find(reg, name, name_end); if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; switch (e->back_num) { @@ -886,8 +880,7 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, extern int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) { return ONIG_NO_SUPPORT_CONFIG; } @@ -928,12 +921,12 @@ scan_env_clear(ScanEnv* env) BIT_STATUS_CLEAR(env->bt_mem_start); BIT_STATUS_CLEAR(env->bt_mem_end); BIT_STATUS_CLEAR(env->backrefed_mem); - env->error = (UChar* )NULL; - env->error_end = (UChar* )NULL; - env->num_call = 0; - env->num_mem = 0; + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + env->num_mem = 0; #ifdef USE_NAMED_GROUP - env->num_named = 0; + env->num_named = 0; #endif env->mem_alloc = 0; env->mem_nodes_dynamic = (Node** )NULL; @@ -968,7 +961,7 @@ scan_env_add_mem_entry(ScanEnv* env) alloc = env->mem_alloc * 2; p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); } - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); for (i = env->num_mem + 1; i < alloc; i++) p[i] = NULL_NODE; @@ -993,7 +986,7 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) } -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE typedef struct _FreeNode { struct _FreeNode* next; } FreeNode; @@ -1008,20 +1001,20 @@ onig_node_free(Node* node) if (IS_NULL(node)) return ; switch (NTYPE(node)) { - case N_STRING: - if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { - xfree(NSTRING(node).s); + case NT_STR: + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); } break; - case N_LIST: - case N_ALT: - onig_node_free(NCONS(node).left); - /* onig_node_free(NCONS(node).right); */ + case NT_LIST: + case NT_ALT: + onig_node_free(NCAR(node)); { - Node* next_node = NCONS(node).right; + Node* next_node = NCDR(node); -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE { FreeNode* n = (FreeNode* )node; @@ -1033,46 +1026,43 @@ onig_node_free(Node* node) #else xfree(node); #endif - node = next_node; goto start; } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* cc = &(NCCLASS(node)); - - if (IS_CCLASS_SHARE(cc)) - return ; + CClassNode* cc = NCCLASS(node); + if (IS_NCCLASS_SHARE(cc)) return ; if (cc->mbuf) bbuf_free(cc->mbuf); } break; - case N_QUANTIFIER: - if (NQUANTIFIER(node).target) - onig_node_free(NQUANTIFIER(node).target); + case NT_QTFR: + if (NQTFR(node)->target) + onig_node_free(NQTFR(node)->target); break; - case N_EFFECT: - if (NEFFECT(node).target) - onig_node_free(NEFFECT(node).target); + case NT_ENCLOSE: + if (NENCLOSE(node)->target) + onig_node_free(NENCLOSE(node)->target); break; - case N_BACKREF: - if (IS_NOT_NULL(NBACKREF(node).back_dynamic)) - xfree(NBACKREF(node).back_dynamic); + case NT_BREF: + if (IS_NOT_NULL(NBREF(node)->back_dynamic)) + xfree(NBREF(node)->back_dynamic); break; - case N_ANCHOR: - if (NANCHOR(node).target) - onig_node_free(NANCHOR(node).target); + case NT_ANCHOR: + if (NANCHOR(node)->target) + onig_node_free(NANCHOR(node)->target); break; } -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE { FreeNode* n = (FreeNode* )node; @@ -1086,7 +1076,7 @@ onig_node_free(Node* node) #endif } -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE extern int onig_free_node_list(void) { @@ -1108,7 +1098,7 @@ node_new(void) { Node* node; -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE THREAD_ATOMIC_START; if (IS_NOT_NULL(FreeNodeList)) { node = (Node* )FreeNodeList; @@ -1120,6 +1110,7 @@ node_new(void) #endif node = (Node* )xmalloc(sizeof(Node)); + /* xmemset(node, 0, sizeof(Node)); */ return node; } @@ -1128,6 +1119,7 @@ static void initialize_cclass(CClassNode* cc) { BITSET_CLEAR(cc->bs); + /* cc->base.flags = 0; */ cc->flags = 0; cc->mbuf = NULL; } @@ -1137,53 +1129,55 @@ node_new_cclass(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_CCLASS; - initialize_cclass(&(NCCLASS(node))); + SET_NTYPE(node, NT_CCLASS); + initialize_cclass(NCCLASS(node)); return node; } static Node* -node_new_cclass_by_codepoint_range(int not, - const OnigCodePoint sbr[], const OnigCodePoint mbr[]) +node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, + const OnigCodePoint ranges[]) { + int n, i; CClassNode* cc; - int n, i, j; + OnigCodePoint j; - Node* node = node_new(); + Node* node = node_new_cclass(); CHECK_NULL_RETURN(node); - node->type = N_CCLASS; - cc = &(NCCLASS(node)); - cc->flags = 0; - if (not != 0) CCLASS_SET_NOT(cc); + cc = NCCLASS(node); + if (not != 0) NCCLASS_SET_NOT(cc); BITSET_CLEAR(cc->bs); - if (IS_NOT_NULL(sbr)) { - n = ONIGENC_CODE_RANGE_NUM(sbr); + if (sb_out > 0 && IS_NOT_NULL(ranges)) { + n = ONIGENC_CODE_RANGE_NUM(ranges); for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); - j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); + j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { + if (j >= sb_out) goto sb_end; + BITSET_SET_BIT(cc->bs, j); } } } - if (IS_NULL(mbr)) { + sb_end: + if (IS_NULL(ranges)) { is_null: cc->mbuf = NULL; } else { BBuf* bbuf; - n = ONIGENC_CODE_RANGE_NUM(mbr); + n = ONIGENC_CODE_RANGE_NUM(ranges); if (n == 0) goto is_null; bbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(bbuf, NULL); + CHECK_NULL_RETURN(bbuf); bbuf->alloc = n + 1; bbuf->used = n + 1; - bbuf->p = (UChar* )((void* )mbr); + bbuf->p = (UChar* )((void* )ranges); cc->mbuf = bbuf; } @@ -1192,12 +1186,14 @@ node_new_cclass_by_codepoint_range(int not, } static Node* -node_new_ctype(int type) +node_new_ctype(int type, int not) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_CTYPE; - NCTYPE(node).type = type; + + SET_NTYPE(node, NT_CTYPE); + NCTYPE(node)->ctype = type; + NCTYPE(node)->not = not; return node; } @@ -1206,7 +1202,8 @@ node_new_anychar(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_ANYCHAR; + + SET_NTYPE(node, NT_CANY); return node; } @@ -1215,9 +1212,10 @@ node_new_list(Node* left, Node* right) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_LIST; - NCONS(node).left = left; - NCONS(node).right = right; + + SET_NTYPE(node, NT_LIST); + NCAR(node) = left; + NCDR(node) = right; return node; } @@ -1227,14 +1225,33 @@ onig_node_new_list(Node* left, Node* right) return node_new_list(left, right); } -static Node* -node_new_alt(Node* left, Node* right) +extern Node* +onig_node_list_add(Node* list, Node* x) +{ + Node *n; + + n = onig_node_new_list(x, NULL); + if (IS_NULL(n)) return NULL_NODE; + + if (IS_NOT_NULL(list)) { + while (IS_NOT_NULL(NCDR(list))) + list = NCDR(list); + + NCDR(list) = n; + } + + return n; +} + +extern Node* +onig_node_new_alt(Node* left, Node* right) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_ALT; - NCONS(node).left = left; - NCONS(node).right = right; + + SET_NTYPE(node, NT_ALT); + NCAR(node) = left; + NCDR(node) = right; return node; } @@ -1243,16 +1260,17 @@ onig_node_new_anchor(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_ANCHOR; - NANCHOR(node).type = type; - NANCHOR(node).target = NULL; - NANCHOR(node).char_len = -1; + + SET_NTYPE(node, NT_ANCHOR); + NANCHOR(node)->type = type; + NANCHOR(node)->target = NULL; + NANCHOR(node)->char_len = -1; return node; } static Node* node_new_backref(int back_num, int* backrefs, int by_name, -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL int exist_level, int nest_level, #endif ScanEnv* env) @@ -1261,31 +1279,32 @@ node_new_backref(int back_num, int* backrefs, int by_name, Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_BACKREF; - NBACKREF(node).state = 0; - NBACKREF(node).back_num = back_num; - NBACKREF(node).back_dynamic = (int* )NULL; + + SET_NTYPE(node, NT_BREF); + NBREF(node)->state = 0; + NBREF(node)->back_num = back_num; + NBREF(node)->back_dynamic = (int* )NULL; if (by_name != 0) - NBACKREF(node).state |= NST_NAME_REF; + NBREF(node)->state |= NST_NAME_REF; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (exist_level != 0) { - NBACKREF(node).state |= NST_NEST_LEVEL; - NBACKREF(node).nest_level = nest_level; + NBREF(node)->state |= NST_NEST_LEVEL; + NBREF(node)->nest_level = nest_level; } #endif for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */ + NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ break; } } if (back_num <= NODE_BACKREFS_SIZE) { for (i = 0; i < back_num; i++) - NBACKREF(node).back_static[i] = backrefs[i]; + NBREF(node)->back_static[i] = backrefs[i]; } else { int* p = (int* )xmalloc(sizeof(int) * back_num); @@ -1293,7 +1312,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, onig_node_free(node); return NULL; } - NBACKREF(node).back_dynamic = p; + NBREF(node)->back_dynamic = p; for (i = 0; i < back_num; i++) p[i] = backrefs[i]; } @@ -1302,17 +1321,17 @@ node_new_backref(int back_num, int* backrefs, int by_name, #ifdef USE_SUBEXP_CALL static Node* -node_new_call(UChar* name, UChar* name_end) +node_new_call(UChar* name, UChar* name_end, int gnum) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_CALL; - NCALL(node).state = 0; - NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF; - NCALL(node).target = NULL_NODE; - NCALL(node).name = name; - NCALL(node).name_end = name_end; + SET_NTYPE(node, NT_CALL); + NCALL(node)->state = 0; + NCALL(node)->target = NULL_NODE; + NCALL(node)->name = name; + NCALL(node)->name_end = name_end; + NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ return node; } #endif @@ -1322,58 +1341,60 @@ node_new_quantifier(int lower, int upper, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_QUANTIFIER; - NQUANTIFIER(node).state = 0; - NQUANTIFIER(node).target = NULL; - NQUANTIFIER(node).lower = lower; - NQUANTIFIER(node).upper = upper; - NQUANTIFIER(node).greedy = 1; - NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQUANTIFIER(node).head_exact = NULL_NODE; - NQUANTIFIER(node).next_head_exact = NULL_NODE; - NQUANTIFIER(node).is_refered = 0; + + SET_NTYPE(node, NT_QTFR); + NQTFR(node)->state = 0; + NQTFR(node)->target = NULL; + NQTFR(node)->lower = lower; + NQTFR(node)->upper = upper; + NQTFR(node)->greedy = 1; + NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQTFR(node)->head_exact = NULL_NODE; + NQTFR(node)->next_head_exact = NULL_NODE; + NQTFR(node)->is_refered = 0; if (by_number != 0) - NQUANTIFIER(node).state |= NST_BY_NUMBER; + NQTFR(node)->state |= NST_BY_NUMBER; #ifdef USE_COMBINATION_EXPLOSION_CHECK - NQUANTIFIER(node).comb_exp_check_num = 0; + NQTFR(node)->comb_exp_check_num = 0; #endif return node; } static Node* -node_new_effect(int type) +node_new_enclose(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_EFFECT; - NEFFECT(node).type = type; - NEFFECT(node).state = 0; - NEFFECT(node).regnum = 0; - NEFFECT(node).option = 0; - NEFFECT(node).target = NULL; - NEFFECT(node).call_addr = -1; - NEFFECT(node).opt_count = 0; + + SET_NTYPE(node, NT_ENCLOSE); + NENCLOSE(node)->type = type; + NENCLOSE(node)->state = 0; + NENCLOSE(node)->regnum = 0; + NENCLOSE(node)->option = 0; + NENCLOSE(node)->target = NULL; + NENCLOSE(node)->call_addr = -1; + NENCLOSE(node)->opt_count = 0; return node; } extern Node* -onig_node_new_effect(int type) +onig_node_new_enclose(int type) { - return node_new_effect(type); + return node_new_enclose(type); } static Node* -node_new_effect_memory(OnigOptionType option, int is_named) +node_new_enclose_memory(OnigOptionType option, int is_named) { - Node* node = node_new_effect(EFFECT_MEMORY); + Node* node = node_new_enclose(ENCLOSE_MEMORY); CHECK_NULL_RETURN(node); if (is_named != 0) - SET_EFFECT_STATUS(node, NST_NAMED_GROUP); + SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); #ifdef USE_SUBEXP_CALL - NEFFECT(node).option = option; + NENCLOSE(node)->option = option; #endif return node; } @@ -1381,9 +1402,9 @@ node_new_effect_memory(OnigOptionType option, int is_named) static Node* node_new_option(OnigOptionType option) { - Node* node = node_new_effect(EFFECT_OPTION); + Node* node = node_new_enclose(ENCLOSE_OPTION); CHECK_NULL_RETURN(node); - NEFFECT(node).option = option; + NENCLOSE(node)->option = option; return node; } @@ -1393,36 +1414,43 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) int addlen = end - s; if (addlen > 0) { - int len = NSTRING(node).end - NSTRING(node).s; + int len = NSTR(node)->end - NSTR(node)->s; - if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { UChar* p; int capa = len + addlen + NODE_STR_MARGIN; - if (capa <= NSTRING(node).capa) { - k_strcpy(NSTRING(node).s + len, s, end); + if (capa <= NSTR(node)->capa) { + onig_strcpy(NSTR(node)->s + len, s, end); } else { - if (NSTRING(node).s == NSTRING(node).buf) - p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end, + if (NSTR(node)->s == NSTR(node)->buf) + p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, s, end, capa); else - p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa); + p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); - NSTRING(node).s = p; - NSTRING(node).capa = capa; + CHECK_NULL_RETURN_MEMERR(p); + NSTR(node)->s = p; + NSTR(node)->capa = capa; } } else { - k_strcpy(NSTRING(node).s + len, s, end); + onig_strcpy(NSTR(node)->s + len, s, end); } - NSTRING(node).end = NSTRING(node).s + len + addlen; + NSTR(node)->end = NSTR(node)->s + len + addlen; } return 0; } +extern int +onig_node_str_set(Node* node, const UChar* s, const UChar* end) +{ + onig_node_str_clear(node); + return onig_node_str_cat(node, s, end); +} + static int node_str_cat_char(Node* node, UChar c) { @@ -1435,26 +1463,25 @@ node_str_cat_char(Node* node, UChar c) extern void onig_node_conv_to_str_node(Node* node, int flag) { - node->type = N_STRING; - - NSTRING(node).flag = flag; - NSTRING(node).capa = 0; - NSTRING(node).s = NSTRING(node).buf; - NSTRING(node).end = NSTRING(node).buf; + SET_NTYPE(node, NT_STR); + NSTR(node)->flag = flag; + NSTR(node)->capa = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; } extern void onig_node_str_clear(Node* node) { - if (NSTRING(node).capa != 0 && - IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { - xfree(NSTRING(node).s); + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); } - NSTRING(node).capa = 0; - NSTRING(node).flag = 0; - NSTRING(node).s = NSTRING(node).buf; - NSTRING(node).end = NSTRING(node).buf; + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; } static Node* @@ -1463,11 +1490,11 @@ node_new_str(const UChar* s, const UChar* end) Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_STRING; - NSTRING(node).capa = 0; - NSTRING(node).flag = 0; - NSTRING(node).s = NSTRING(node).buf; - NSTRING(node).end = NSTRING(node).buf; + SET_NTYPE(node, NT_STR); + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; if (onig_node_str_cat(node, s, end)) { onig_node_free(node); return NULL; @@ -1481,7 +1508,6 @@ onig_node_new_str(const UChar* s, const UChar* end) return node_new_str(s, end); } -#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG static Node* node_new_str_raw(UChar* s, UChar* end) { @@ -1489,7 +1515,6 @@ node_new_str_raw(UChar* s, UChar* end) NSTRING_SET_RAW(node); return node; } -#endif static Node* node_new_empty(void) @@ -1498,12 +1523,12 @@ node_new_empty(void) } static Node* -node_new_str_char(UChar c) +node_new_str_raw_char(UChar c) { UChar p[1]; p[0] = c; - return node_new_str(p, p + 1); + return node_new_str_raw(p, p + 1); } static Node* @@ -1528,7 +1553,7 @@ static int str_node_can_be_split(StrNode* sn, OnigEncoding enc) { if (sn->end > sn->s) { - return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0); + return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0); } return 0; } @@ -1651,7 +1676,7 @@ new_code_range(BBuf** pbuf) BBuf* bbuf; bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*pbuf); r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); if (r) return r; @@ -1952,10 +1977,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) BitSetRef bsr1, bsr2; BitSet bs1, bs2; - not1 = IS_CCLASS_NOT(dest); + not1 = IS_NCCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; - not2 = IS_CCLASS_NOT(cc); + not2 = IS_NCCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; @@ -2010,10 +2035,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) BitSetRef bsr1, bsr2; BitSet bs1, bs2; - not1 = IS_CCLASS_NOT(dest); + not1 = IS_NCCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; - not2 = IS_CCLASS_NOT(cc); + not2 = IS_NCCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; @@ -2066,13 +2091,13 @@ conv_backslash_value(int c, ScanEnv* env) { if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { switch (c) { - case 'n': return '\n'; - case 't': return '\t'; - case 'r': return '\r'; - case 'f': return '\f'; - case 'a': return '\007'; - case 'b': return '\010'; - case 'e': return '\033'; + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; case 'v': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) return '\v'; @@ -2089,26 +2114,26 @@ static int is_invalid_quantifier_target(Node* node) { switch (NTYPE(node)) { - case N_ANCHOR: + case NT_ANCHOR: return 1; break; - case N_EFFECT: - if (NEFFECT(node).type == EFFECT_OPTION) - return is_invalid_quantifier_target(NEFFECT(node).target); + case NT_ENCLOSE: + /* allow enclosed elements */ + /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ break; - case N_LIST: /* ex. (?:\G\A)* */ + case NT_LIST: do { - if (! is_invalid_quantifier_target(NCONS(node).left)) return 0; - } while (IS_NOT_NULL(node = NCONS(node).right)); + if (! is_invalid_quantifier_target(NCAR(node))) return 0; + } while (IS_NOT_NULL(node = NCDR(node))); return 0; break; - case N_ALT: /* ex. (?:abc|\A)* */ + case NT_ALT: do { - if (is_invalid_quantifier_target(NCONS(node).left)) return 1; - } while (IS_NOT_NULL(node = NCONS(node).right)); + if (is_invalid_quantifier_target(NCAR(node))) return 1; + } while (IS_NOT_NULL(node = NCDR(node))); break; default: @@ -2119,24 +2144,24 @@ is_invalid_quantifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_quantifier_num(QuantifierNode* qf) +popular_quantifier_num(QtfrNode* q) { - if (qf->greedy) { - if (qf->lower == 0) { - if (qf->upper == 1) return 0; - else if (IS_REPEAT_INFINITE(qf->upper)) return 1; + if (q->greedy) { + if (q->lower == 0) { + if (q->upper == 1) return 0; + else if (IS_REPEAT_INFINITE(q->upper)) return 1; } - else if (qf->lower == 1) { - if (IS_REPEAT_INFINITE(qf->upper)) return 2; + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 2; } } else { - if (qf->lower == 0) { - if (qf->upper == 1) return 3; - else if (IS_REPEAT_INFINITE(qf->upper)) return 4; + if (q->lower == 0) { + if (q->upper == 1) return 3; + else if (IS_REPEAT_INFINITE(q->upper)) return 4; } - else if (qf->lower == 1) { - if (IS_REPEAT_INFINITE(qf->upper)) return 5; + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 5; } } return -1; @@ -2166,16 +2191,17 @@ extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode) { int pnum, cnum; - QuantifierNode *p, *c; + QtfrNode *p, *c; - p = &(NQUANTIFIER(pnode)); - c = &(NQUANTIFIER(cnode)); + p = NQTFR(pnode); + c = NQTFR(cnode); pnum = popular_quantifier_num(p); cnum = popular_quantifier_num(c); + if (pnum < 0 || cnum < 0) return ; switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: - *p = *c; + *pnode = *cnode; break; case RQ_A: p->target = c->target; @@ -2262,7 +2288,7 @@ typedef struct { int ref1; int* refs; int by_name; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL int exist_level; int level; /* \k */ #endif @@ -2270,8 +2296,10 @@ typedef struct { struct { UChar* name; UChar* name_end; + int gnum; } call; struct { + int ctype; int not; } prop; } u; @@ -2346,7 +2374,7 @@ fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) if (PEND) goto invalid; PFETCH(c); if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { - if (c != MC_ESC(enc)) goto invalid; + if (c != MC_ESC(env->syntax)) goto invalid; PFETCH(c); } if (c != '}') goto invalid; @@ -2389,7 +2417,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) if (c != '-') return ONIGERR_META_CODE_SYNTAX; if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH(c); - if (c == MC_ESC(enc)) { + if (c == MC_ESC(env->syntax)) { v = fetch_escaped_value(&p, end, env); if (v < 0) return v; c = (OnigCodePoint )v; @@ -2419,7 +2447,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) c = 0177; } else { - if (c == MC_ESC(enc)) { + if (c == MC_ESC(env->syntax)) { v = fetch_escaped_value(&p, end, env); if (v < 0) return v; c = (OnigCodePoint )v; @@ -2444,23 +2472,47 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); +static OnigCodePoint +get_name_end_code_point(OnigCodePoint start) +{ + switch (start) { + case '<': return (OnigCodePoint )'>'; break; + case '\'': return (OnigCodePoint )'\''; break; + default: + break; + } + + return (OnigCodePoint )0; +} + #ifdef USE_NAMED_GROUP -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL /* \k, \k + \k, \k + \k<-num+n>, \k<-num-n> */ static int -fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end - , ScanEnv* env, int* level) +fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel) { - int r, exist_level = 0; + int r, sign, is_num, exist_level; + OnigCodePoint end_code; OnigCodePoint c = 0; - OnigCodePoint first_code; OnigEncoding enc = env->enc; UChar *name_end; + UChar *pnum_head; UChar *p = *src; PFETCH_READY; + *rback_num = 0; + is_num = exist_level = 0; + sign = 1; + pnum_head = *src; + + end_code = get_name_end_code_point(start_code); + name_end = end; r = 0; if (PEND) { @@ -2468,11 +2520,18 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end } else { PFETCH(c); - first_code = c; - if (c == '>') + if (c == end_code) return ONIGERR_EMPTY_GROUP_NAME; - if (!ONIGENC_IS_CODE_WORD(enc, c)) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } @@ -2480,43 +2539,58 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end while (!PEND) { name_end = p; PFETCH(c); - if (c == '>' || c == ')' || c == '+' || c == '-') break; + if (c == end_code || c == ')' || c == '+' || c == '-') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } - if (!ONIGENC_IS_CODE_WORD(enc, c)) { + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } - if (c != '>') { + if (r == 0 && c != end_code) { if (c == '+' || c == '-') { - int num; + int level; int flag = (c == '-' ? -1 : 1); PFETCH(c); if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; PUNFETCH; - num = onig_scan_unsigned_number(&p, end, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - *level = (num * flag); + level = onig_scan_unsigned_number(&p, end, enc); + if (level < 0) return ONIGERR_TOO_BIG_NUMBER; + *rlevel = (level * flag); exist_level = 1; PFETCH(c); - if (c == '>') - goto first_check; + if (c == end_code) + goto end; } err: r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } - else { - first_check: - if (ONIGENC_IS_CODE_ASCII(first_code) && - ONIGENC_IS_CODE_UPPER(enc, first_code)) - r = ONIGERR_INVALID_GROUP_NAME; - } + end: if (r == 0) { + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) goto err; + + *rback_num *= sign; + } + *rname_end = name_end; *src = p; return (exist_level ? 1 : 0); @@ -2526,33 +2600,40 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end return r; } } -#endif /* USE_BACKREF_AT_LEVEL */ +#endif /* USE_BACKREF_WITH_LEVEL */ /* def: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) */ static int -fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) { - int r, is_num; + int r, is_num, sign; + OnigCodePoint end_code; OnigCodePoint c = 0; - OnigCodePoint first_code; OnigEncoding enc = env->enc; UChar *name_end; + UChar *pnum_head; UChar *p = *src; PFETCH_READY; + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + name_end = end; + pnum_head = *src; r = 0; is_num = 0; + sign = 1; if (PEND) { return ONIGERR_EMPTY_GROUP_NAME; } else { PFETCH(c); - first_code = c; - if (c == '>') + if (c == end_code) return ONIGERR_EMPTY_GROUP_NAME; if (ONIGENC_IS_CODE_DIGIT(enc, c)) { @@ -2560,6 +2641,18 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) is_num = 1; else { r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (c == '-') { + if (ref == 1) { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { @@ -2567,74 +2660,137 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) } } - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == '>' || c == ')') break; + if (r == 0) { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } - if (is_num == 1) { - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) { - if (!ONIGENC_IS_CODE_WORD(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - else - r = ONIGERR_INVALID_GROUP_NAME; + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + + is_num = 0; + } } - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } } } - } - if (c != '>') { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - else { - if (ONIGENC_IS_CODE_ASCII(first_code) && - ONIGENC_IS_CODE_UPPER(enc, first_code)) + if (c != end_code) { r = ONIGERR_INVALID_GROUP_NAME; - } + name_end = end; + } + + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + + *rback_num *= sign; + } - if (r == 0) { *rname_end = name_end; *src = p; return 0; } else { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') + break; + } + if (PEND) + name_end = end; + + err: onig_scan_env_set_error_string(env, r, *src, name_end); return r; } } #else static int -fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) { - int r, len; + int r, is_num, sign; + OnigCodePoint end_code; OnigCodePoint c = 0; UChar *name_end; OnigEncoding enc = env->enc; + UChar *pnum_head; UChar *p = *src; PFETCH_READY; + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + *rname_end = name_end = end; r = 0; + pnum_head = *src; + is_num = 0; + sign = 1; + + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + while (!PEND) { name_end = p; - if (enc_len(enc, p) > 1) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; PFETCH(c); - if (c == '>' || c == ')') break; + if (c == end_code || c == ')') break; if (! ONIGENC_IS_CODE_DIGIT(enc, c)) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } - if (c != '>') { + if (r == 0 && c != end_code) { r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } if (r == 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + *rback_num *= sign; + *rname_end = name_end; *src = p; return 0; @@ -2645,7 +2801,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) return r; } } -#endif +#endif /* USE_NAMED_GROUP */ static void CC_ESC_WARN(ScanEnv* env, UChar *c) @@ -2663,7 +2819,7 @@ CC_ESC_WARN(ScanEnv* env, UChar *c) } static void -CCEND_ESC_WARN(ScanEnv* env, UChar* c) +CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) { if (onig_warn == onig_null_warn) return ; @@ -2687,12 +2843,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, while (p < to) { x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enc_len(enc, p); + q = p + enclen(enc, p); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += enc_len(enc, q); + q += enclen(enc, q); } if (i >= n) { if (IS_NOT_NULL(next)) @@ -2707,7 +2863,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, static int str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, - OnigCodePoint bad, OnigEncoding enc) + OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn) { int i, in_esc; OnigCodePoint x; @@ -2718,24 +2874,24 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, while (p < to) { if (in_esc) { in_esc = 0; - p += enc_len(enc, p); + p += enclen(enc, p); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enc_len(enc, p); + q = p + enclen(enc, p); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += enc_len(enc, q); + q += enclen(enc, q); } if (i >= n) return 1; - p += enc_len(enc, p); + p += enclen(enc, p); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); if (x == bad) return 0; - else if (x == MC_ESC(enc)) in_esc = 1; + else if (x == MC_ESC(syn)) in_esc = 1; p = q; } } @@ -2771,7 +2927,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) else if (c == '-') { tok->type = TK_CC_RANGE; } - else if (c == MC_ESC(enc)) { + else if (c == MC_ESC(syn)) { if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) goto end; @@ -2783,37 +2939,45 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) switch (c) { case 'w': tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_WORD; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 0; break; case 'W': tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_WORD; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 1; break; case 'd': tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_DIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 0; break; case 'D': tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_DIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 1; break; case 's': tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_WHITE_SPACE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 0; break; case 'S': tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_WHITE_SPACE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 1; break; case 'h': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_XDIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; break; case 'H': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_XDIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; break; case 'p': @@ -2850,7 +3014,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) { + if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { PINC; tok->type = TK_CODE_POINT; tok->base = 16; @@ -2922,7 +3086,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->backp = p; /* point at '[' is readed */ PINC; if (str_exist_check_with_esc(send, 2, p, end, - (OnigCodePoint )']', enc)) { + (OnigCodePoint )']', enc, syn)) { tok->type = TK_POSIX_BRACKET_OPEN; } else { @@ -2975,7 +3139,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->backp = p; PFETCH(c); - if (IS_MC_ESC_CODE(c, enc, syn)) { + if (IS_MC_ESC_CODE(c, syn)) { if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; tok->backp = p; @@ -3062,13 +3226,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'w': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_WORD; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 0; break; case 'W': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_WORD; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 1; break; case 'b': @@ -3100,37 +3266,43 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 's': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_WHITE_SPACE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 0; break; case 'S': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_WHITE_SPACE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 1; break; case 'd': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_DIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 0; break; case 'D': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_DIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 1; break; case 'h': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_XDIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; break; case 'H': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.subtype = CTYPE_NOT_XDIGIT; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; break; case 'A': @@ -3182,7 +3354,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) { + if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { PINC; tok->type = TK_CODE_POINT; tok->u.code = (OnigCodePoint )num; @@ -3240,7 +3412,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.backref.num = 1; tok->u.backref.ref1 = num; tok->u.backref.by_name = 0; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL tok->u.backref.exist_level = 0; #endif break; @@ -3276,46 +3448,67 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'k': if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { PFETCH(c); - if (c == '<') { + if (c == '<' || c == '\'') { UChar* name_end; int* backs; + int back_num; prev = p; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL name_end = NULL_UCHARP; /* no need. escape gcc warning. */ - r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level); + r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, + env, &back_num, &tok->u.backref.level); if (r == 1) tok->u.backref.exist_level = 1; else tok->u.backref.exist_level = 0; #else - r = fetch_name(&p, end, &name_end, env, 1); + r = fetch_name(&p, end, &name_end, env, &back_num, 1); #endif if (r < 0) return r; - num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); - if (num <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - int i; - for (i = 0; i < num; i++) { - if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + if (back_num != 0) { + if (back_num < 0) { + back_num = BACKREF_REL_TO_ABS(back_num, env); + if (back_num <= 0) return ONIGERR_INVALID_BACKREF; } - } - tok->type = TK_BACKREF; - tok->u.backref.by_name = 1; - if (num == 1) { + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; + } + tok->type = TK_BACKREF; + tok->u.backref.by_name = 0; tok->u.backref.num = 1; - tok->u.backref.ref1 = backs[0]; + tok->u.backref.ref1 = back_num; } else { - tok->u.backref.num = num; - tok->u.backref.refs = backs; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } } } else @@ -3328,16 +3521,18 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'g': if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { PFETCH(c); - if (c == '<') { + if (c == '<' || c == '\'') { + int gnum; UChar* name_end; prev = p; - r = fetch_name(&p, end, &name_end, env, 1); + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); if (r < 0) return r; tok->type = TK_CALL; tok->u.call.name = prev; tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; } else PUNFETCH; @@ -3380,7 +3575,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.code = (OnigCodePoint )num; } else { /* string */ - p = tok->backp + enc_len(enc, tok->backp); + p = tok->backp + enclen(enc, tok->backp); } break; } @@ -3392,15 +3587,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) #ifdef USE_VARIABLE_META_CHARS if ((c != ONIG_INEFFECTIVE_META_CHAR) && IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { - if (c == MC_ANYCHAR(enc)) + if (c == MC_ANYCHAR(syn)) goto any_char; - else if (c == MC_ANYTIME(enc)) + else if (c == MC_ANYTIME(syn)) goto anytime; - else if (c == MC_ZERO_OR_ONE_TIME(enc)) + else if (c == MC_ZERO_OR_ONE_TIME(syn)) goto zero_or_one_time; - else if (c == MC_ONE_OR_MORE_TIME(enc)) + else if (c == MC_ONE_OR_MORE_TIME(syn)) goto one_or_more_time; - else if (c == MC_ANYCHAR_ANYTIME(enc)) { + else if (c == MC_ANYCHAR_ANYTIME(syn)) { tok->type = TK_ANYCHAR_ANYTIME; goto out; } @@ -3477,7 +3672,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) while (1) { if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; PFETCH(c); - if (c == MC_ESC(enc)) { + if (c == MC_ESC(syn)) { if (!PEND) PFETCH(c); } else { @@ -3519,7 +3714,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case ']': if (*src > env->pattern) /* /].../ is allowed. */ - CCEND_ESC_WARN(env, (UChar* )"]"); + CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); break; case '#': @@ -3553,24 +3748,36 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } static int -add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, - const OnigCodePoint sbr[], const OnigCodePoint mbr[]) +add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, + OnigEncoding enc ARG_UNUSED, + OnigCodePoint sb_out, const OnigCodePoint mbr[]) { int i, r; OnigCodePoint j; - int nsb = ONIGENC_CODE_RANGE_NUM(sbr); - int nmb = ONIGENC_CODE_RANGE_NUM(mbr); + int n = ONIGENC_CODE_RANGE_NUM(mbr); if (not == 0) { - for (i = 0; i < nsb; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); - j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); + j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { + if (j >= sb_out) { + if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++; + else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), j, + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + i++; + } + + goto sb_end; + } BITSET_SET_BIT(cc->bs, j); } } - for (i = 0; i < nmb; i++) { + sb_end: + for ( ; i < n; i++) { r = add_code_range_to_buf(&(cc->mbuf), ONIGENC_CODE_RANGE_FROM(mbr, i), ONIGENC_CODE_RANGE_TO(mbr, i)); @@ -3580,24 +3787,24 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, else { OnigCodePoint prev = 0; - if (ONIGENC_MBC_MINLEN(enc) == 1) { - for (i = 0; i < nsb; i++) { - for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) { - BITSET_SET_BIT(cc->bs, j); - } - prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1; - } - if (prev < 0x7f) { - for (j = prev; j < 0x7f; j++) { - BITSET_SET_BIT(cc->bs, j); - } + for (i = 0; i < n; i++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + if (j >= sb_out) { + goto sb_end2; + } + BITSET_SET_BIT(cc->bs, j); } - - prev = 0x80; + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + for (j = prev; j < sb_out; j++) { + BITSET_SET_BIT(cc->bs, j); } - for (i = 0; i < nmb; i++) { + sb_end2: + prev = sb_out; + + for (i = 0; i < n; i++) { if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { r = add_code_range_to_buf(&(cc->mbuf), prev, ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); @@ -3618,12 +3825,13 @@ static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { int c, r; - const OnigCodePoint *sbr, *mbr; + const OnigCodePoint *ranges; + OnigCodePoint sb_out; OnigEncoding enc = env->enc; - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr); + return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); } else if (r != ONIG_NO_SUPPORT_CONFIG) { return r; @@ -3677,13 +3885,13 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_WORD: if (not == 0) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); } ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */ + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ && ! ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); } @@ -3698,62 +3906,11 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) return r; } -static int -parse_ctype_to_enc_ctype(int pctype, int* not) -{ - int ctype; - - switch (pctype) { - case CTYPE_WORD: - ctype = ONIGENC_CTYPE_WORD; - *not = 0; - break; - case CTYPE_NOT_WORD: - ctype = ONIGENC_CTYPE_WORD; - *not = 1; - break; - case CTYPE_WHITE_SPACE: - ctype = ONIGENC_CTYPE_SPACE; - *not = 0; - break; - case CTYPE_NOT_WHITE_SPACE: - ctype = ONIGENC_CTYPE_SPACE; - *not = 1; - break; - case CTYPE_DIGIT: - ctype = ONIGENC_CTYPE_DIGIT; - *not = 0; - break; - case CTYPE_NOT_DIGIT: - ctype = ONIGENC_CTYPE_DIGIT; - *not = 1; - break; - case CTYPE_XDIGIT: - ctype = ONIGENC_CTYPE_XDIGIT; - *not = 0; - break; - case CTYPE_NOT_XDIGIT: - ctype = ONIGENC_CTYPE_XDIGIT; - *not = 1; - break; - default: - return ONIGERR_PARSER_BUG; - break; - } - return ctype; -} - -typedef struct { - UChar *name; - int ctype; - short int len; -} PosixBracketEntryType; - static int parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) { #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 -#define POSIX_BRACKET_NAME_MAX_LEN 6 +#define POSIX_BRACKET_NAME_MIN_LEN 4 static PosixBracketEntryType PBS[] = { { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, @@ -3769,7 +3926,8 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )NULL, -1, 0 } + { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } }; PosixBracketEntryType *pb; @@ -3786,7 +3944,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) else not = 0; - if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2) + if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) goto not_posix_bracket; for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { @@ -3820,86 +3978,39 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) } } - return 1; /* 1: is not POSIX bracket, but no error. */ -} - -static int -property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc) -{ - static PosixBracketEntryType PBS[] = { - { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )NULL, -1, 0 } - }; - - PosixBracketEntryType *pb; - int len; - - len = onigenc_strlen(enc, p, end); - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (len == pb->len && - onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) - return pb->ctype; - } - - return -1; + return 1; /* 1: is not POSIX bracket, but no error. */ } static int fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) { - int ctype; + int r; OnigCodePoint c; OnigEncoding enc = env->enc; UChar *prev, *start, *p = *src; PFETCH_READY; - /* 'IsXXXX' => 'XXXX' */ - if (!PEND && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) { - c = PPEEK; - if (c == 'I') { - PINC; - if (! PEND) { - c = PPEEK; - if (c == 's') - PINC; - else - PUNFETCH; - } - } - } - + r = 0; start = prev = p; while (!PEND) { prev = p; PFETCH(c); if (c == '}') { - ctype = property_name_to_ctype(start, prev, enc); - if (ctype < 0) break; + r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); + if (r < 0) break; *src = p; - return ctype; + return r; } - else if (c == '(' || c == ')' || c == '{' || c == '|') + else if (c == '(' || c == ')' || c == '{' || c == '|') { + r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; break; + } } - onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME, - *src, prev); - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + onig_scan_env_set_error_string(env, r, *src, prev); + return r; } static int @@ -3913,11 +4024,11 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, if (ctype < 0) return ctype; *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - cc = &(NCCLASS(*np)); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); r = add_ctype_to_cc(cc, ctype, 0, env); if (r != 0) return r; - if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); return 0; } @@ -3981,7 +4092,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, if (intype == *type) { if (intype == CCV_SB) { if (*vs > 0xff || v > 0xff) - return ONIGERR_INVALID_WIDE_CHAR_VALUE; + return ONIGERR_INVALID_CODE_POINT_VALUE; if (*vs > v) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) @@ -4036,10 +4147,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, static int code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, - OnigEncoding enc) + ScanEnv* env) { int in_esc; OnigCodePoint code; + OnigEncoding enc = env->enc; UChar* p = from; PFETCH_READY; @@ -4051,7 +4163,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, else { PFETCH(code); if (code == c) return 1; - if (code == MC_ESC(enc)) in_esc = 1; + if (code == MC_ESC(env->syntax)) in_esc = 1; } } return 0; @@ -4086,7 +4198,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (r < 0) return r; if (r == TK_CC_CLOSE) { if (! code_exist_check((OnigCodePoint )']', - *src, env->pattern_end, 1, env->enc)) + *src, env->pattern_end, 1, env)) return ONIGERR_EMPTY_CHAR_CLASS; CC_ESC_WARN(env, (UChar* )"]"); @@ -4094,8 +4206,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } *np = node = node_new_cclass(); - CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); - cc = &(NCCLASS(node)); + CHECK_NULL_RETURN_MEMERR(node); + cc = NCCLASS(node); and_start = 0; state = CCS_START; @@ -4108,6 +4220,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (len > 1) { in_type = CCV_CODE_POINT; } + else if (len < 0) { + r = len; + goto err; + } else { sb_char: in_type = CCV_SB; @@ -4141,7 +4257,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, goto err; } - len = enc_len(env->enc, buf); + len = enclen(env->enc, buf); if (i < len) { r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; goto err; @@ -4202,12 +4318,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, break; case TK_CHAR_TYPE: - { - int ctype, not; - ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); - r = add_ctype_to_cc(cc, ctype, not, env); - if (r != 0) return r; - } + r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); + if (r != 0) return r; next_class: r = next_state_class(cc, &vs, &val_type, &state, env); @@ -4287,7 +4399,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_char_class(&anode, tok, &p, end, env); if (r != 0) goto cc_open_err; - acc = &(NCCLASS(anode)); + acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); onig_node_free(anode); @@ -4352,10 +4464,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } if (neg != 0) - CCLASS_SET_NOT(cc); + NCCLASS_SET_NOT(cc); else - CCLASS_CLEAR_NOT(cc); - if (IS_CCLASS_NOT(cc) && + NCCLASS_CLEAR_NOT(cc); + if (IS_NCCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { int is_empty; @@ -4378,7 +4490,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, return 0; err: - if (cc != &(NCCLASS(*np))) + if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); onig_node_free(*np); return r; @@ -4388,15 +4500,19 @@ static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); static int -parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) +parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r, num; - int list_capture; Node *target; OnigOptionType option; - OnigEncoding enc = env->enc; OnigCodePoint c; + OnigEncoding enc = env->enc; + +#ifdef USE_NAMED_GROUP + int list_capture; +#endif + UChar* p = *src; PFETCH_READY; @@ -4428,9 +4544,19 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ - *np = node_new_effect(EFFECT_STOP_BACKTRACK); + *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); break; +#ifdef USE_NAMED_GROUP + case '\'': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + goto named_group1; + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#endif + case '<': /* look behind (?<=...), (?syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - UChar *name; - UChar *name_end; + else { + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; - PUNFETCH; - list_capture = 0; + PUNFETCH; + c = '<'; - named_group: - name = p; - r = fetch_name(&p, end, &name_end, env, 0); - if (r < 0) return r; + named_group1: + list_capture = 0; - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + named_group2: + name = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + if (r < 0) return r; - r = name_add(env->reg, name, name_end, num, env); - if (r != 0) return r; - *np = node_new_effect_memory(env->option, 1); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - NEFFECT(*np).regnum = num; - if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - env->num_named++; + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_enclose_memory(env->option, 1); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } } -#endif - else +#else + else { return ONIGERR_UNDEFINED_GROUP_OPTION; + } +#endif break; case '@': @@ -4474,25 +4610,25 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, #ifdef USE_NAMED_GROUP if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { PFETCH(c); - if (c == '<') { + if (c == '<' || c == '\'') { list_capture = 1; - goto named_group; /* (?@...) */ + goto named_group2; /* (?@...) */ } PUNFETCH; } #endif - *np = node_new_effect_memory(env->option, 0); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { onig_node_free(*np); return num; } - else if (num >= BIT_STATUS_BITS_NUM) { + else if (num >= (int )BIT_STATUS_BITS_NUM) { onig_node_free(*np); return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } - NEFFECT(*np).regnum = num; + NENCLOSE(*np)->regnum = num; BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); } else { @@ -4545,7 +4681,7 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (c == ')') { *np = node_new_option(option); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); *src = p; return 2; /* option only */ } @@ -4559,8 +4695,8 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, env->option = prev; if (r < 0) return r; *np = node_new_option(option); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - NEFFECT(*np).target = target; + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->target = target; *src = p; return 0; } @@ -4579,26 +4715,26 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; - *np = node_new_effect_memory(env->option, 0); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; - NEFFECT(*np).regnum = num; + NENCLOSE(*np)->regnum = num; } - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); if (r < 0) return r; - if (NTYPE(*np) == N_ANCHOR) - NANCHOR(*np).target = target; + if (NTYPE(*np) == NT_ANCHOR) + NANCHOR(*np)->target = target; else { - NEFFECT(*np).target = target; - if (NEFFECT(*np).type == EFFECT_MEMORY) { + NENCLOSE(*np)->target = target; + if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np); + r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); if (r != 0) return r; } } @@ -4618,17 +4754,17 @@ static const char* ReduceQStr[] = { static int set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { - QuantifierNode* qn; + QtfrNode* qn; - qn = &(NQUANTIFIER(qnode)); + qn = NQTFR(qnode); if (qn->lower == 1 && qn->upper == 1) { return 1; } switch (NTYPE(target)) { - case N_STRING: + case NT_STR: if (! group) { - StrNode* sn = &(NSTRING(target)); + StrNode* sn = NSTR(target); if (str_node_can_be_split(sn, env->enc)) { Node* n = str_node_split_last_char(sn, env->enc); if (IS_NOT_NULL(n)) { @@ -4639,10 +4775,10 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) } break; - case N_QUANTIFIER: + case NT_QTFR: { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QuantifierNode* qnt = &(NQUANTIFIER(target)); + QtfrNode* qnt = NQTFR(target); int nestq_num = popular_quantifier_num(qn); int targetq_num = popular_quantifier_num(qnt); @@ -4705,6 +4841,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) return 0; } + #ifdef USE_SHARED_CCLASS_TABLE #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 @@ -4728,17 +4865,17 @@ static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) static int type_cclass_hash(type_cclass_key* key) { int i, val; - unsigned char *p; + UChar *p; val = 0; - p = (unsigned char* )&(key->enc); - for (i = 0; i < sizeof(key->enc); i++) { + p = (UChar* )&(key->enc); + for (i = 0; i < (int )sizeof(key->enc); i++) { val = val * 997 + (int )*p++; } - p = (unsigned char* )(&key->type); - for (i = 0; i < sizeof(key->type); i++) { + p = (UChar* )(&key->type); + for (i = 0; i < (int )sizeof(key->type); i++) { val = val * 997 + (int )*p++; } @@ -4755,10 +4892,10 @@ static st_table* OnigTypeCClassTable; static int -i_free_shared_class(type_cclass_key* key, Node* node, void* arg) +i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) { if (IS_NOT_NULL(node)) { - CClassNode* cc = &(NCCLASS(node)); + CClassNode* cc = NCCLASS(node); if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); xfree(node); } @@ -4782,6 +4919,118 @@ onig_free_shared_cclass_table(void) #endif /* USE_SHARED_CCLASS_TABLE */ +#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +static int +clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) +{ + BBuf *tbuf; + int r; + + if (IS_NCCLASS_NOT(cc)) { + bitset_invert(cc->bs); + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + r = not_code_range_buf(enc, cc->mbuf, &tbuf); + if (r != 0) return r; + + bbuf_free(cc->mbuf); + cc->mbuf = tbuf; + } + + NCCLASS_CLEAR_NOT(cc); + } + + return 0; +} +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + +typedef struct { + ScanEnv* env; + CClassNode* cc; + Node* alt_root; + Node** ptail; +} IApplyCaseFoldArg; + +static int +i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], + int to_len, void* arg) +{ + IApplyCaseFoldArg* iarg; + ScanEnv* env; + CClassNode* cc; + BitSetRef bs; + + iarg = (IApplyCaseFoldArg* )arg; + env = iarg->env; + cc = iarg->cc; + bs = cc->bs; + + if (to_len == 1) { + int is_in = onig_is_code_in_cc(env->enc, from, cc); +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || + (is_in == 0 && IS_NCCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + BITSET_SET_BIT(bs, *to); + } + } +#else + if (is_in != 0) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + if (IS_NCCLASS_NOT(cc)) { + BITSET_CLEAR_BIT(bs, *to); + } + else + BITSET_SET_BIT(bs, *to); + } + } +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + } + else { + int r, i, len; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + Node *snode = NULL_NODE; + + if (onig_is_code_in_cc(env->enc, from, cc) +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + && !IS_NCCLASS_NOT(cc) +#endif + ) { + for (i = 0; i < to_len; i++) { + len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); + if (i == 0) { + snode = onig_node_new_str(buf, buf + len); + CHECK_NULL_RETURN_MEMERR(snode); + + /* char-class expanded multi-char only + compare with string folded at match time. */ + NSTRING_SET_AMBIG(snode); + } + else { + r = onig_node_str_cat(snode, buf, buf + len); + if (r < 0) { + onig_node_free(snode); + return r; + } + } + } + + *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); + iarg->ptail = &(NCDR((*(iarg->ptail)))); + } + } + + return 0; +} + static int parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) @@ -4791,7 +5040,7 @@ parse_exp(Node** np, OnigToken* tok, int term, Node** targetp; *np = NULL; - if (tok->type == term) + if (tok->type == (enum TokenSyms )term) goto end_of_token; switch (tok->type) { @@ -4803,20 +5052,20 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_SUBEXP_OPEN: - r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; if (r == 1) group = 1; else if (r == 2) { /* option only */ Node* target; OnigOptionType prev = env->option; - env->option = NEFFECT(*np).option; + env->option = NENCLOSE(*np)->option; r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); env->option = prev; if (r < 0) return r; - NEFFECT(*np).target = target; + NENCLOSE(*np)->target = target; return tok->type; } break; @@ -4833,7 +5082,7 @@ parse_exp(Node** np, OnigToken* tok, int term, tk_byte: { *np = node_new_str(tok->backp, *src); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); while (1) { r = fetch_token(tok, src, end, env); @@ -4853,13 +5102,14 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_RAW_BYTE: tk_raw_byte: { - *np = node_new_str_char((UChar )tok->u.c); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *np = node_new_str_raw_char((UChar )tok->u.c); + CHECK_NULL_RETURN_MEMERR(*np); len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enc_len(env->enc, NSTRING(*np).s)) { + if (len == enclen(env->enc, NSTR(*np)->s)) { r = fetch_token(tok, src, end, env); + NSTRING_CLEAR_RAW(*np); goto string_end; } } @@ -4867,12 +5117,14 @@ parse_exp(Node** np, OnigToken* tok, int term, r = fetch_token(tok, src, end, env); if (r < 0) return r; if (r != TK_RAW_BYTE) { + /* Don't use this, it is wrong for little endian encodings. */ #ifdef USE_PAD_TO_SHORT_BYTE_CHAR int rem; if (len < ONIGENC_MBC_MINLEN(env->enc)) { rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0); - if (len + rem == enc_len(env->enc, NSTRING(*np).s)) { + (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); + if (len + rem == enclen(env->enc, NSTR(*np)->s)) { + NSTRING_CLEAR_RAW(*np); goto string_end; } } @@ -4898,7 +5150,7 @@ parse_exp(Node** np, OnigToken* tok, int term, #else *np = node_new_str(buf, buf + num); #endif - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); } break; @@ -4907,7 +5159,7 @@ parse_exp(Node** np, OnigToken* tok, int term, OnigCodePoint end_op[2]; UChar *qstart, *qend, *nextp; - end_op[0] = (OnigCodePoint )MC_ESC(env->enc); + end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); end_op[1] = (OnigCodePoint )'E'; qstart = *src; qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); @@ -4915,35 +5167,31 @@ parse_exp(Node** np, OnigToken* tok, int term, nextp = qend = end; } *np = node_new_str(qstart, qend); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); *src = nextp; } break; case TK_CHAR_TYPE: { - switch (tok->u.subtype) { - case CTYPE_WORD: - case CTYPE_NOT_WORD: - *np = node_new_ctype(tok->u.subtype); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + switch (tok->u.prop.ctype) { + case ONIGENC_CTYPE_WORD: + *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); + CHECK_NULL_RETURN_MEMERR(*np); break; - case CTYPE_WHITE_SPACE: - case CTYPE_NOT_WHITE_SPACE: - case CTYPE_DIGIT: - case CTYPE_NOT_DIGIT: - case CTYPE_XDIGIT: - case CTYPE_NOT_XDIGIT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_XDIGIT: { CClassNode* cc; - int ctype, not; #ifdef USE_SHARED_CCLASS_TABLE - const OnigCodePoint *sbr, *mbr; + const OnigCodePoint *mbr; + OnigCodePoint sb_out; - ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); - r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, + &sb_out, &mbr); if (r == 0 && ONIGENC_CODE_RANGE_NUM(mbr) >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { @@ -4951,8 +5199,8 @@ parse_exp(Node** np, OnigToken* tok, int term, type_cclass_key* new_key; key.enc = env->enc; - key.not = not; - key.type = ctype; + key.not = tok->u.prop.not; + key.type = tok->u.prop.ctype; THREAD_ATOMIC_START; @@ -4972,14 +5220,17 @@ parse_exp(Node** np, OnigToken* tok, int term, } } - *np = node_new_cclass_by_codepoint_range(not, sbr, mbr); + *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, + sb_out, mbr); if (IS_NULL(*np)) { THREAD_ATOMIC_END; return ONIGERR_MEMORY; } - CCLASS_SET_SHARE(&(NCCLASS(*np))); + cc = NCCLASS(*np); + NCCLASS_SET_SHARE(cc); new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + xmemcpy(new_key, &key, sizeof(type_cclass_key)); onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, (st_data_t )*np); @@ -4987,12 +5238,11 @@ parse_exp(Node** np, OnigToken* tok, int term, } else { #endif - ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - cc = &(NCCLASS(*np)); - add_ctype_to_cc(cc, ctype, 0, env); - if (not != 0) CCLASS_SET_NOT(cc); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); #ifdef USE_SHARED_CCLASS_TABLE } #endif @@ -5018,55 +5268,44 @@ parse_exp(Node** np, OnigToken* tok, int term, r = parse_char_class(np, tok, src, end, env); if (r != 0) return r; - cc = &(NCCLASS(*np)); - + cc = NCCLASS(*np); if (IS_IGNORECASE(env->option)) { - int i, n, in_cc; - const OnigPairAmbigCodes* ccs; - BitSetRef bs = cc->bs; - OnigAmbigType amb; - - for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { - if ((amb & env->ambig_flag) == 0) continue; - - n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs); - for (i = 0; i < n; i++) { - in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc); - - if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) || - (in_cc == 0 && IS_CCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || - ccs[i].from >= SINGLE_BYTE_SIZE) { - /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */ - add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to); - } - else { - if (BITSET_AT(bs, ccs[i].from)) { - /* /(?i:[^A-C])/.match("a") ==> fail. */ - BITSET_SET_BIT(bs, ccs[i].to); - } - if (BITSET_AT(bs, ccs[i].to)) { - BITSET_SET_BIT(bs, ccs[i].from); - } - } - } + IApplyCaseFoldArg iarg; + + iarg.env = env; + iarg.cc = cc; + iarg.alt_root = NULL_NODE; + iarg.ptail = &(iarg.alt_root); + + r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, + i_apply_case_fold, &iarg); + if (r != 0) { + onig_node_free(iarg.alt_root); + return r; + } + if (IS_NOT_NULL(iarg.alt_root)) { + Node* work = onig_node_new_alt(*np, iarg.alt_root); + if (IS_NULL(work)) { + onig_node_free(iarg.alt_root); + return ONIGERR_MEMORY; } - } + *np = work; + } } } break; case TK_ANYCHAR: *np = node_new_anychar(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); break; case TK_ANYCHAR_ANYTIME: *np = node_new_anychar(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); qn = node_new_quantifier(0, REPEAT_INFINITE, 0); - CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUANTIFIER(qn).target = *np; + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->target = *np; *np = qn; break; @@ -5075,19 +5314,28 @@ parse_exp(Node** np, OnigToken* tok, int term, *np = node_new_backref(len, (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), tok->u.backref.by_name, -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL tok->u.backref.exist_level, tok->u.backref.level, #endif env); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); break; #ifdef USE_SUBEXP_CALL case TK_CALL: - *np = node_new_call(tok->u.call.name, tok->u.call.name_end); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - env->num_call++; + { + int gnum = tok->u.call.gnum; + + if (gnum < 0) { + gnum = BACKREF_REL_TO_ABS(gnum, env); + if (gnum <= 0) + return ONIGERR_INVALID_BACKREF; + } + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + CHECK_NULL_RETURN_MEMERR(*np); + env->num_call++; + } break; #endif @@ -5126,31 +5374,46 @@ parse_exp(Node** np, OnigToken* tok, int term, return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); - CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUANTIFIER(qn).greedy = tok->u.repeat.greedy; + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->greedy = tok->u.repeat.greedy; r = set_quantifier(qn, *targetp, group, env); - if (r < 0) return r; - + if (r < 0) { + onig_node_free(qn); + return r; + } + if (tok->u.repeat.possessive != 0) { Node* en; - en = node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); - NEFFECT(en).target = qn; + en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NULL(en)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + NENCLOSE(en)->target = qn; qn = en; } if (r == 0) { *targetp = qn; } + else if (r == 1) { + onig_node_free(qn); + } else if (r == 2) { /* split case: /abc+/ */ Node *tmp; *targetp = node_new_list(*targetp, NULL); - CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY); - tmp = NCONS(*targetp).right = node_new_list(qn, NULL); - CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY); - targetp = &(NCONS(tmp).left); + if (IS_NULL(*targetp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + tmp = NCDR(*targetp) = node_new_list(qn, NULL); + if (IS_NULL(tmp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + targetp = &(NCAR(tmp)); } goto re_entry; } @@ -5175,19 +5438,19 @@ parse_branch(Node** top, OnigToken* tok, int term, } else { *top = node_new_list(node, NULL); - headp = &(NCONS(*top).right); + headp = &(NCDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) return r; - if (NTYPE(node) == N_LIST) { + if (NTYPE(node) == NT_LIST) { *headp = node; - while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right; - headp = &(NCONS(node).right); + while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); + headp = &(NCDR(node)); } else { *headp = node_new_list(node, NULL); - headp = &(NCONS(*headp).right); + headp = &(NCDR(*headp)); } } } @@ -5214,19 +5477,19 @@ parse_subexp(Node** top, OnigToken* tok, int term, *top = node; } else if (r == TK_ALT) { - *top = node_new_alt(node, NULL); - headp = &(NCONS(*top).right); + *top = onig_node_new_alt(node, NULL); + headp = &(NCDR(*top)); while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_branch(&node, tok, term, src, end, env); if (r < 0) return r; - *headp = node_new_alt(node, NULL); - headp = &(NCONS(*headp).right); + *headp = onig_node_new_alt(node, NULL); + headp = &(NCDR(*headp)); } - if (tok->type != term) + if (tok->type != (enum TokenSyms )term) goto err; } else { @@ -5254,8 +5517,8 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) } extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, - ScanEnv* env) +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) { int r; UChar* p; @@ -5265,13 +5528,13 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_ #endif scan_env_clear(env); - env->option = reg->options; - env->ambig_flag = reg->ambig_flag; - env->enc = reg->enc; - env->syntax = reg->syntax; - env->pattern = (UChar* )pattern; - env->pattern_end = (UChar* )end; - env->reg = reg; + env->option = reg->options; + env->case_fold_flag = reg->case_fold_flag; + env->enc = reg->enc; + env->syntax = reg->syntax; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; + env->reg = reg; *root = NULL; p = (UChar* )pattern; @@ -5281,7 +5544,7 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_ } extern void -onig_scan_env_set_error_string(ScanEnv* env, int ecode, +onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, UChar* arg, UChar* arg_end) { env->error = arg; diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h index b25618a33f..0c5c2c936c 100644 --- a/ext/mbstring/oniguruma/regparse.h +++ b/ext/mbstring/oniguruma/regparse.h @@ -32,47 +32,61 @@ #include "regint.h" /* node type */ -#define N_STRING (1<< 0) -#define N_CCLASS (1<< 1) -#define N_CTYPE (1<< 2) -#define N_ANYCHAR (1<< 3) -#define N_BACKREF (1<< 4) -#define N_QUANTIFIER (1<< 5) -#define N_EFFECT (1<< 6) -#define N_ANCHOR (1<< 7) -#define N_LIST (1<< 8) -#define N_ALT (1<< 9) -#define N_CALL (1<<10) +#define NT_STR 0 +#define NT_CCLASS 1 +#define NT_CTYPE 2 +#define NT_CANY 3 +#define NT_BREF 4 +#define NT_QTFR 5 +#define NT_ENCLOSE 6 +#define NT_ANCHOR 7 +#define NT_LIST 8 +#define NT_ALT 9 +#define NT_CALL 10 + +/* node type bit */ +#define NTYPE2BIT(type) (1<<(type)) + +#define BIT_NT_STR NTYPE2BIT(NT_STR) +#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) +#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) +#define BIT_NT_CANY NTYPE2BIT(NT_CANY) +#define BIT_NT_BREF NTYPE2BIT(NT_BREF) +#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) +#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) +#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) +#define BIT_NT_LIST NTYPE2BIT(NT_LIST) +#define BIT_NT_ALT NTYPE2BIT(NT_ALT) +#define BIT_NT_CALL NTYPE2BIT(NT_CALL) #define IS_NODE_TYPE_SIMPLE(type) \ - (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0) - -#define NTYPE(node) ((node)->type) -#define NCONS(node) ((node)->u.cons) -#define NSTRING(node) ((node)->u.str) -#define NCCLASS(node) ((node)->u.cclass) -#define NCTYPE(node) ((node)->u.ctype) -#define NQUANTIFIER(node) ((node)->u.quantifier) -#define NANCHOR(node) ((node)->u.anchor) -#define NBACKREF(node) ((node)->u.backref) -#define NEFFECT(node) ((node)->u.effect) -#define NCALL(node) ((node)->u.call) - -#define CTYPE_WORD (1<<0) -#define CTYPE_NOT_WORD (1<<1) -#define CTYPE_WHITE_SPACE (1<<2) -#define CTYPE_NOT_WHITE_SPACE (1<<3) -#define CTYPE_DIGIT (1<<4) -#define CTYPE_NOT_DIGIT (1<<5) -#define CTYPE_XDIGIT (1<<6) -#define CTYPE_NOT_XDIGIT (1<<7) + ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ + BIT_NT_CANY | BIT_NT_BREF)) != 0) + +#define NTYPE(node) ((node)->u.base.type) +#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) + +#define NSTR(node) (&((node)->u.str)) +#define NCCLASS(node) (&((node)->u.cclass)) +#define NCTYPE(node) (&((node)->u.ctype)) +#define NBREF(node) (&((node)->u.bref)) +#define NQTFR(node) (&((node)->u.qtfr)) +#define NENCLOSE(node) (&((node)->u.enclose)) +#define NANCHOR(node) (&((node)->u.anchor)) +#define NCONS(node) (&((node)->u.cons)) +#define NCALL(node) (&((node)->u.call)) + +#define NCAR(node) (NCONS(node)->car) +#define NCDR(node) (NCONS(node)->cdr) + + #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) -#define EFFECT_MEMORY (1<<0) -#define EFFECT_OPTION (1<<1) -#define EFFECT_STOP_BACKTRACK (1<<2) +#define ENCLOSE_MEMORY (1<<0) +#define ENCLOSE_OPTION (1<<1) +#define ENCLOSE_STOP_BACKTRACK (1<<2) #define NODE_STR_MARGIN 16 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ @@ -80,17 +94,18 @@ #define NSTR_RAW (1<<0) /* by backslashed number */ #define NSTR_AMBIG (1<<1) -#define NSTR_AMBIG_REDUCE (1<<2) +#define NSTR_DONT_GET_OPT_INFO (1<<2) #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG -#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE +#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ + (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) -#define NSTRING_IS_AMBIG_REDUCE(node) \ - (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0) +#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ + (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); @@ -100,39 +115,6 @@ #define NQ_TARGET_IS_EMPTY_MEM 2 #define NQ_TARGET_IS_EMPTY_REC 3 - -typedef struct { - UChar* s; - UChar* end; - unsigned int flag; - int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; -} StrNode; - -/* move to regint.h */ -#if 0 -typedef struct { - int flags; - BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ -} CClassNode; -#endif - -typedef struct { - int state; - struct _Node* target; - int lower; - int upper; - int greedy; - int target_empty_info; - struct _Node* head_exact; - struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif -} QuantifierNode; - /* status bits */ #define NST_MIN_FIXED (1<<0) #define NST_MAX_FIXED (1<<1) @@ -150,105 +132,142 @@ typedef struct { #define NST_NEST_LEVEL (1<<13) #define NST_BY_NUMBER (1<<14) /* {n,m} */ -#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) -#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) - -#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0) -#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) -#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0) -#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0) -#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0) -#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) -#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) -#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) -#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \ +#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) +#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) + +#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) +#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) +#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) +#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) +#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) +#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) +#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) +#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) +#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) +#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) -#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) + +#define CALLNODE_REFNUM_UNDEF -1 typedef struct { + NodeBase base; + UChar* s; + UChar* end; + unsigned int flag; + int capa; /* (allocated size - 1) or 0: use buf[] */ + UChar buf[NODE_STR_BUF_SIZE]; +} StrNode; + +typedef struct { + NodeBase base; + int state; + struct _Node* target; + int lower; + int upper; + int greedy; + int target_empty_info; + struct _Node* head_exact; + struct _Node* next_head_exact; + int is_refered; /* include called node. don't eliminate even if {0} */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QtfrNode; + +typedef struct { + NodeBase base; int state; int type; int regnum; OnigOptionType option; - struct _Node* target; - AbsAddrType call_addr; + struct _Node* target; + AbsAddrType call_addr; /* for multiple call reference */ OnigDistance min_len; /* min length (byte) */ OnigDistance max_len; /* max length (byte) */ - int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ -} EffectNode; - -#define CALLNODE_REFNUM_UNDEF -1 + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ +} EncloseNode; #ifdef USE_SUBEXP_CALL typedef struct { - int offset; + int offset; struct _Node* target; } UnsetAddr; typedef struct { - int num; - int alloc; + int num; + int alloc; UnsetAddr* us; } UnsetAddrList; typedef struct { + NodeBase base; int state; - int ref_num; + int group_num; UChar* name; UChar* name_end; - struct _Node* target; /* EffectNode : EFFECT_MEMORY */ + struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ UnsetAddrList* unset_addr_list; } CallNode; #endif typedef struct { - int state; - int back_num; - int back_static[NODE_BACKREFS_SIZE]; - int* back_dynamic; - int nest_level; -} BackrefNode; + NodeBase base; + int state; + int back_num; + int back_static[NODE_BACKREFS_SIZE]; + int* back_dynamic; + int nest_level; +} BRefNode; typedef struct { + NodeBase base; int type; struct _Node* target; int char_len; } AnchorNode; +typedef struct { + NodeBase base; + struct _Node* car; + struct _Node* cdr; +} ConsAltNode; + +typedef struct { + NodeBase base; + int ctype; + int not; +} CtypeNode; + typedef struct _Node { - int type; union { - StrNode str; - CClassNode cclass; - QuantifierNode quantifier; - EffectNode effect; + NodeBase base; + StrNode str; + CClassNode cclass; + QtfrNode qtfr; + EncloseNode enclose; + BRefNode bref; + AnchorNode anchor; + ConsAltNode cons; + CtypeNode ctype; #ifdef USE_SUBEXP_CALL - CallNode call; + CallNode call; #endif - BackrefNode backref; - AnchorNode anchor; - struct { - struct _Node* left; - struct _Node* right; - } cons; - struct { - int type; - } ctype; } u; } Node; + #define NULL_NODE ((Node* )0) #define SCANENV_MEMNODES_SIZE 8 @@ -257,30 +276,30 @@ typedef struct _Node { (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) typedef struct { - OnigOptionType option; - OnigAmbigType ambig_flag; - OnigEncoding enc; - OnigSyntaxType* syntax; - BitStatusType capture_history; - BitStatusType bt_mem_start; - BitStatusType bt_mem_end; - BitStatusType backrefed_mem; - UChar* pattern; - UChar* pattern_end; - UChar* error; - UChar* error_end; - regex_t* reg; /* for reg->names only */ - int num_call; + OnigOptionType option; + OnigCaseFoldType case_fold_flag; + OnigEncoding enc; + OnigSyntaxType* syntax; + BitStatusType capture_history; + BitStatusType bt_mem_start; + BitStatusType bt_mem_end; + BitStatusType backrefed_mem; + UChar* pattern; + UChar* pattern_end; + UChar* error; + UChar* error_end; + regex_t* reg; /* for reg->names only */ + int num_call; #ifdef USE_SUBEXP_CALL - UnsetAddrList* unset_addr_list; + UnsetAddrList* unset_addr_list; #endif - int num_mem; + int num_mem; #ifdef USE_NAMED_GROUP - int num_named; + int num_named; #endif - int mem_alloc; - Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; - Node** mem_nodes_dynamic; + int mem_alloc; + Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; + Node** mem_nodes_dynamic; #ifdef USE_COMBINATION_EXPLOSION_CHECK int num_comb_exp_check; int comb_exp_max_regnum; @@ -294,7 +313,6 @@ typedef struct { #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) - #ifdef USE_NAMED_GROUP typedef struct { int new_val; @@ -304,20 +322,25 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); #endif extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); +extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); extern void onig_node_conv_to_str_node P_((Node* node, int raw)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); +extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); -extern Node* onig_node_new_effect P_((int type)); +extern Node* onig_node_new_enclose P_((int type)); extern Node* onig_node_new_anchor P_((int type)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); +extern Node* onig_node_list_add P_((Node* list, Node* x)); +extern Node* onig_node_new_alt P_((Node* left, Node* right)); extern void onig_node_str_clear P_((Node* node)); extern int onig_free_node_list P_((void)); extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); +extern int onig_free_shared_cclass_table P_((void)); #ifdef ONIG_DEBUG #ifdef USE_NAMED_GROUP diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c index e54b5c4089..56f75abfc1 100644 --- a/ext/mbstring/oniguruma/regposerr.c +++ b/ext/mbstring/oniguruma/regposerr.c @@ -2,7 +2,7 @@ regposerr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +36,12 @@ # include #endif +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + static char* ESTRING[] = { NULL, "failed to match", /* REG_NOMATCH */ @@ -63,13 +69,15 @@ static char* ESTRING[] = { extern size_t -regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size) +regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, + size_t size) { char* s; char tbuf[35]; size_t len; - if (posix_ecode > 0 && posix_ecode < sizeof(ESTRING) / sizeof(ESTRING[0])) { + if (posix_ecode > 0 + && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) { s = ESTRING[posix_ecode]; } else if (posix_ecode == 0) { diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index a3bacf722e..7d1857cf2d 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -102,7 +102,7 @@ onig2posix_error_code(int code) { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC }, { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, @@ -122,7 +122,7 @@ onig2posix_error_code(int code) if (code >= 0) return 0; - for (i = 0; i < sizeof(o2p) / sizeof(o2p[0]); i++) { + for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) { if (code == o2p[i].onig_err) return o2p[i].posix_err; } @@ -273,9 +273,9 @@ typedef struct { void* arg; } i_wrap; -static int i_wrapper(const unsigned char* name, const unsigned char* name_end, - int ng, int* gs, - onig_regex_t* reg, void* arg) +static int +i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs, + onig_regex_t* reg ARG_UNUSED, void* arg) { i_wrap* warg = (i_wrap* )arg; diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c index 9114e39e6b..ade5b55f77 100644 --- a/ext/mbstring/oniguruma/regsyntax.c +++ b/ext/mbstring/oniguruma/regsyntax.c @@ -34,6 +34,15 @@ OnigSyntaxType OnigSyntaxASIS = { , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE , 0 , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxPosixBasic = { @@ -42,6 +51,15 @@ OnigSyntaxType OnigSyntaxPosixBasic = { , 0 , 0 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxPosixExtended = { @@ -54,6 +72,15 @@ OnigSyntaxType OnigSyntaxPosixExtended = { ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxEmacs = { @@ -66,6 +93,15 @@ OnigSyntaxType OnigSyntaxEmacs = { , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxGrep = { @@ -79,6 +115,15 @@ OnigSyntaxType OnigSyntaxGrep = { , 0 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxGnuRegex = { @@ -86,6 +131,15 @@ OnigSyntaxType OnigSyntaxGnuRegex = { , 0 , SYN_GNU_REGEX_BV , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxJava = { @@ -100,6 +154,15 @@ OnigSyntaxType OnigSyntaxJava = { ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; OnigSyntaxType OnigSyntaxPerl = { @@ -111,10 +174,18 @@ OnigSyntaxType OnigSyntaxPerl = { , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS ) + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) , SYN_GNU_REGEX_BV , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; /* Perl + named group */ @@ -128,7 +199,6 @@ OnigSyntaxType OnigSyntaxPerl_NG = { ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) @@ -136,6 +206,15 @@ OnigSyntaxType OnigSyntaxPerl_NG = { ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } }; @@ -205,7 +284,7 @@ onig_get_syntax_options(OnigSyntaxType* syntax) } #ifdef USE_VARIABLE_META_CHARS -extern int onig_set_meta_char(OnigEncoding enc, +extern int onig_set_meta_char(OnigSyntaxType* enc, unsigned int what, OnigCodePoint code) { switch (what) { diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c index 5fad0cc18c..113fbaedc6 100644 --- a/ext/mbstring/oniguruma/regversion.c +++ b/ext/mbstring/oniguruma/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2008 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ * SUCH DAMAGE. */ +#include "config.h" #include "oniguruma.h" #include @@ -47,7 +48,7 @@ onig_copyright(void) { static char s[58]; - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako", + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c index 2324da2635..022880ae36 100644 --- a/ext/mbstring/oniguruma/st.c +++ b/ext/mbstring/oniguruma/st.c @@ -2,7 +2,6 @@ /* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ -#include "config.h" #include #include #include @@ -11,22 +10,7 @@ #include #endif -#ifdef NOT_RUBY #include "regint.h" -#else -#ifdef RUBY_PLATFORM -#define xmalloc ruby_xmalloc -#define xcalloc ruby_xcalloc -#define xrealloc ruby_xrealloc -#define xfree ruby_xfree - -void *xmalloc(long); -void *xcalloc(long, long); -void *xrealloc(void *, long); -void xfree(void *); -#endif -#endif - #include "st.h" typedef struct st_table_entry st_table_entry; @@ -467,8 +451,13 @@ st_delete_safe(table, key, value, never) } static int +#if defined(__GNUC__) +delete_never(st_data_t key __attribute__ ((unused)), st_data_t value, + st_data_t never) +#else delete_never(key, value, never) st_data_t key, value, never; +#endif { if (value == never) return ST_DELETE; return ST_CONTINUE; diff --git a/ext/mbstring/oniguruma/testc.c b/ext/mbstring/oniguruma/testc.c new file mode 100644 index 0000000000..6a8c77896d --- /dev/null +++ b/ext/mbstring/oniguruma/testc.c @@ -0,0 +1,863 @@ +/* + * This program was generated by testconv.rb. + */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +#endif + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_EUC_JP); +#else + region = onig_region_new(); +#endif + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?a)", "a", 0, 1); + x2("(?ab)\\g", "abab", 0, 4); + x2("(?.zv.)\\k", "azvbazvb", 0, 8); + x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); + x2("(?|a\\g)+", "", 0, 0); + x2("(?|\\(\\g\\))+$", "()(())", 0, 6); + x3("\\g(?.){0}", "X", 0, 1, 1); + x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); + x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); + x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); + x2("(?:(?)|(?efg))\\k", "", 0, 0); + x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); + n("(?:(?abc)|(?efg))\\k", "abcefg"); + x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); + x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); + x2("(?a|\\(\\g\\))", "a", 0, 1); + x2("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); + x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); + x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); + x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); + x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); + x2("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("\\xED\\xF2", "\xed\xf2", 0, 2); + x2("", "¤¢", 0, 0); + x2("¤¢", "¤¢", 0, 2); + n("¤¤", "¤¢"); + x2("¤¦¤¦", "¤¦¤¦", 0, 4); + x2("¤¢¤¤¤¦", "¤¢¤¤¤¦", 0, 6); + x2("¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", "¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", 0, 70); + x2("¤¢", "¤¤¤¢", 2, 4); + x2("¤¤¤¦", "¤¢¤¤¤¦", 2, 6); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "¤¢", 0, 2); + x2("..", "¤«¤­", 0, 4); + x2("\\w", "¤ª", 0, 2); + n("\\W", "¤¢"); + x2("[\\W]", "¤¦$", 2, 3); + x2("\\S", "¤½", 0, 2); + x2("\\S", "´Á", 0, 2); + x2("\\b", "µ¤ ", 0, 0); + x2("\\b", " ¤Û", 1, 1); + x2("\\B", "¤»¤½ ", 2, 2); + x2("\\B", "¤¦ ", 3, 3); + x2("\\B", " ¤¤", 0, 0); + x2("[¤¿¤Á]", "¤Á", 0, 2); + n("[¤Ê¤Ë]", "¤Ì"); + x2("[¤¦-¤ª]", "¤¨", 0, 2); + n("[^¤±]", "¤±"); + x2("[\\w]", "¤Í", 0, 2); + n("[\\d]", "¤Õ"); + x2("[\\D]", "¤Ï", 0, 2); + n("[\\s]", "¤¯"); + x2("[\\S]", "¤Ø", 0, 2); + x2("[\\w\\d]", "¤è", 0, 2); + x2("[\\w\\d]", " ¤è", 3, 5); + n("\\wµ´¼Ö", " µ´¼Ö"); + x2("µ´\\W¼Ö", "µ´ ¼Ö", 0, 5); + x2("¤¢.¤¤.¤¦", "¤¢¤¢¤¤¤¤¤¦", 0, 10); + x2(".\\w¤¦\\W..¤¾", "¤¨¤¦¤¦ ¤¦¤¾¤¾", 0, 13); + x2("\\s\\w¤³¤³¤³", " ¤³¤³¤³¤³", 0, 9); + x2("¤¢¤¢.¤±", "¤¢¤¢¤±¤±", 0, 8); + n(".¤¤", "¤¤¤¨"); + x2(".¤ª", "¤ª¤ª", 0, 4); + x2("^¤¢", "¤¢", 0, 2); + x2("^¤à$", "¤à", 0, 2); + x2("^\\w$", "¤Ë", 0, 2); + x2("^\\w¤«¤­¤¯¤±¤³$", "z¤«¤­¤¯¤±¤³", 0, 11); + x2("^\\w...¤¦¤¨¤ª$", "z¤¢¤¤¤¦¤¦¤¨¤ª", 0, 13); + x2("\\w\\w\\s\\W¤ª¤ª¤ª\\d", "a¤ª ¤ª¤ª¤ª4", 0, 12); + x2("\\A¤¿¤Á¤Ä", "¤¿¤Á¤Ä", 0, 6); + x2("¤à¤á¤â\\Z", "¤à¤á¤â", 0, 6); + x2("¤«¤­¤¯\\z", "¤«¤­¤¯", 0, 6); + x2("¤«¤­¤¯\\Z", "¤«¤­¤¯\n", 0, 6); + x2("\\G¤Ý¤Ô", "¤Ý¤Ô", 0, 4); + n("\\G¤¨", "¤¦¤¨¤ª"); + n("¤È¤Æ\\G", "¤È¤Æ"); + n("¤Þ¤ß\\A", "¤Þ¤ß"); + n("¤Þ\\A¤ß", "¤Þ¤ß"); + x2("(?=¤»)¤»", "¤»", 0, 2); + n("(?=¤¦).", "¤¤"); + x2("(?!¤¦)¤«", "¤«", 0, 2); + n("(?!¤È)¤¢", "¤È"); + x2("(?i:¤¢)", "¤¢", 0, 2); + x2("(?i:¤Ö¤Ù)", "¤Ö¤Ù", 0, 4); + n("(?i:¤¤)", "¤¦"); + x2("(?m:¤è.)", "¤è\n", 0, 3); + x2("(?m:.¤á)", "¤Þ\n¤á", 2, 5); + x2("¤¢?", "", 0, 0); + x2("ÊÑ?", "²½", 0, 0); + x2("ÊÑ?", "ÊÑ", 0, 2); + x2("ÎÌ*", "", 0, 0); + x2("ÎÌ*", "ÎÌ", 0, 2); + x2("»Ò*", "»Ò»Ò»Ò", 0, 6); + x2("ÇÏ*", "¼¯ÇÏÇÏÇÏÇÏ", 0, 0); + n("»³+", ""); + x2("²Ï+", "²Ï", 0, 2); + x2("»þ+", "»þ»þ»þ»þ", 0, 8); + x2("¤¨+", "¤¨¤¨¤¦¤¦¤¦", 0, 4); + x2("¤¦+", "¤ª¤¦¤¦¤¦¤¦", 2, 10); + x2(".?", "¤¿", 0, 2); + x2(".*", "¤Ñ¤Ô¤×¤Ú", 0, 8); + x2(".+", "¤í", 0, 2); + x2(".+", "¤¤¤¦¤¨¤«\n", 0, 8); + x2("¤¢|¤¤", "¤¢", 0, 2); + x2("¤¢|¤¤", "¤¤", 0, 2); + x2("¤¢¤¤|¤¤¤¦", "¤¢¤¤", 0, 4); + x2("¤¢¤¤|¤¤¤¦", "¤¤¤¦", 0, 4); + x2("¤ò(?:¤«¤­|¤­¤¯)", "¤ò¤«¤­", 0, 6); + x2("¤ò(?:¤«¤­|¤­¤¯)¤±", "¤ò¤­¤¯¤±", 0, 8); + x2("¤¢¤¤|(?:¤¢¤¦|¤¢¤ò)", "¤¢¤ò", 0, 4); + x2("¤¢|¤¤|¤¦", "¤¨¤¦", 2, 4); + x2("¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤·¤¹¤»", 0, 6); + n("¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤¹¤»"); + x2("¤¢|^¤ï", "¤Ö¤¢", 2, 4); + x2("¤¢|^¤ò", "¤ò¤¢", 0, 2); + x2("µ´|\\G¼Ö", "¤±¼Öµ´", 4, 6); + x2("µ´|\\G¼Ö", "¼Öµ´", 0, 2); + x2("µ´|\\A¼Ö", "b¼Öµ´", 3, 5); + x2("µ´|\\A¼Ö", "¼Ö", 0, 2); + x2("µ´|¼Ö\\Z", "¼Öµ´", 2, 4); + x2("µ´|¼Ö\\Z", "¼Ö", 0, 2); + x2("µ´|¼Ö\\Z", "¼Ö\n", 0, 2); + x2("µ´|¼Ö\\z", "¼Öµ´", 2, 4); + x2("µ´|¼Ö\\z", "¼Ö", 0, 2); + x2("\\w|\\s", "¤ª", 0, 2); + x2("\\w|%", "%¤ª", 0, 1); + x2("\\w|[&$]", "¤¦&", 0, 2); + x2("[¤¤-¤±]", "¤¦", 0, 2); + x2("[¤¤-¤±]|[^¤«-¤³]", "¤¢", 0, 2); + x2("[¤¤-¤±]|[^¤«-¤³]", "¤«", 0, 2); + x2("[^¤¢]", "\n", 0, 1); + x2("(?:¤¢|[¤¦-¤­])|¤¤¤ò", "¤¦¤ò", 0, 2); + x2("(?:¤¢|[¤¦-¤­])|¤¤¤ò", "¤¤¤ò", 0, 4); + x2("¤¢¤¤¤¦|(?=¤±¤±)..¤Û", "¤±¤±¤Û", 0, 6); + x2("¤¢¤¤¤¦|(?!¤±¤±)..¤Û", "¤¢¤¤¤Û", 0, 6); + x2("(?=¤ò¤¢)..¤¢|(?=¤ò¤ò)..¤¢", "¤ò¤ò¤¢", 0, 6); + x2("(?<=¤¢|¤¤¤¦)¤¤", "¤¤¤¦¤¤", 4, 6); + n("(?>¤¢|¤¢¤¤¤¨)¤¦", "¤¢¤¤¤¨¤¦"); + x2("(?>¤¢¤¤¤¨|¤¢)¤¦", "¤¢¤¤¤¨¤¦", 0, 8); + x2("¤¢?|¤¤", "¤¢", 0, 2); + x2("¤¢?|¤¤", "¤¤", 0, 0); + x2("¤¢?|¤¤", "", 0, 0); + x2("¤¢*|¤¤", "¤¢¤¢", 0, 4); + x2("¤¢*|¤¤*", "¤¤¤¢", 0, 0); + x2("¤¢*|¤¤*", "¤¢¤¤", 0, 2); + x2("[a¤¢]*|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 3); + x2("¤¢+|¤¤*", "", 0, 0); + x2("¤¢+|¤¤*", "¤¤¤¤¤¤", 0, 6); + x2("¤¢+|¤¤*", "¤¢¤¤¤¤¤¤", 0, 2); + x2("¤¢+|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 0); + n("¤¢+|¤¤+", ""); + x2("(¤¢|¤¤)?", "¤¤", 0, 2); + x2("(¤¢|¤¤)*", "¤¤¤¢", 0, 4); + x2("(¤¢|¤¤)+", "¤¤¤¢¤¤", 0, 6); + x2("(¤¢¤¤|¤¦¤¢)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 0, 8); + x2("(¤¢¤¤|¤¦¤¨)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 4, 12); + x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¢¤¤¤¦¤¢", 2, 10); + x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¤¤ò¤¦¤¢", 0, 4); + x2("(¤¢¤¤|¤¦¤¢)+", "$$zzzz¤¢¤¤¤ò¤¦¤¢", 6, 10); + x2("(¤¢|¤¤¤¢¤¤)+", "¤¢¤¤¤¢¤¤¤¢", 0, 10); + x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢", 2, 4); + x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢¤¢¤¢¤¤¤¢", 2, 8); + x2("(?:¤¢|¤¤)(?:¤¢|¤¤)", "¤¢¤¤", 0, 4); + x2("(?:¤¢*|¤¤*)(?:¤¢*|¤¤*)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 6); + x2("(?:¤¢*|¤¤*)(?:¤¢+|¤¤+)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12); + x2("(?:¤¢+|¤¤+){2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12); + x2("(?:¤¢+|¤¤+){1,2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12); + x2("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¦¤¦", 0, 4); + n("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¢¤¤¤¦¤¦"); + x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¢¤¤¤¦", 12, 16); + x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¤¤¦", 0, 14); + x2("¤¦{0,}", "¤¦¤¦¤¦¤¦", 0, 8); + x2("¤¢|(?i)c", "C", 0, 1); + x2("(?i)c|¤¢", "C", 0, 1); + x2("(?i:¤¢)|a", "a", 0, 1); + n("(?i:¤¢)|a", "A"); + x2("[¤¢¤¤¤¦]?", "¤¢¤¤¤¦", 0, 2); + x2("[¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 6); + x2("[^¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 0); + n("[^¤¢¤¤¤¦]+", "¤¢¤¤¤¦"); + x2("¤¢?\?", "¤¢¤¢¤¢", 0, 0); + x2("¤¤¤¢?\?¤¤", "¤¤¤¢¤¤", 0, 6); + x2("¤¢*?", "¤¢¤¢¤¢", 0, 0); + x2("¤¤¤¢*?", "¤¤¤¢¤¢", 0, 2); + x2("¤¤¤¢*?¤¤", "¤¤¤¢¤¢¤¤", 0, 8); + x2("¤¢+?", "¤¢¤¢¤¢", 0, 2); + x2("¤¤¤¢+?", "¤¤¤¢¤¢", 0, 4); + x2("¤¤¤¢+?¤¤", "¤¤¤¢¤¢¤¤", 0, 8); + x2("(?:Å·?)?\?", "Å·", 0, 0); + x2("(?:Å·?\?)?", "Å·", 0, 0); + x2("(?:Ì´?)+?", "Ì´Ì´Ì´", 0, 2); + x2("(?:É÷+)?\?", "É÷É÷É÷", 0, 0); + x2("(?:Àã+)?\?Áú", "ÀãÀãÀãÁú", 0, 8); + x2("(?:¤¢¤¤)?{2}", "", 0, 0); + x2("(?:µ´¼Ö)?{2}", "µ´¼Öµ´¼Öµ´", 0, 8); + x2("(?:µ´¼Ö)*{0}", "µ´¼Öµ´¼Öµ´", 0, 0); + x2("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16); + n("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Ö"); + x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Ö", 0, 12); + x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16); + x2("(?:µ´¼Ö){2,4}?", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 8); + x2("(?:µ´¼Ö){,}", "µ´¼Ö{,}", 0, 7); + x2("(?:¤«¤­¤¯)+?{2}", "¤«¤­¤¯¤«¤­¤¯¤«¤­¤¯", 0, 12); + x3("(²Ð)", "²Ð", 0, 2, 1); + x3("(²Ð¿å)", "²Ð¿å", 0, 4, 1); + x2("((»þ´Ö))", "»þ´Ö", 0, 4); + x3("((É÷¿å))", "É÷¿å", 0, 4, 1); + x3("((ºòÆü))", "ºòÆü", 0, 4, 2); + x3("((((((((((((((((((((ÎÌ»Ò))))))))))))))))))))", "ÎÌ»Ò", 0, 4, 20); + x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 0, 4, 1); + x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 4, 8, 2); + x3("()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³", "¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³", 6, 12, 3); + x3("(()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³)", "¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³", 6, 12, 4); + x3(".*(¥Õ¥©)¥ó¡¦¥Þ(¥ó()¥·¥å¥¿)¥¤¥ó", "¥Õ¥©¥ó¡¦¥Þ¥ó¥·¥å¥¿¥¤¥ó", 10, 18, 2); + x2("(^¤¢)", "¤¢", 0, 2); + x3("(¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 1); + x3("(^¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 2); + x3("(¤¢?)", "¤¢¤¢¤¢", 0, 2, 1); + x3("(¤Þ*)", "¤Þ¤Þ¤Þ", 0, 6, 1); + x3("(¤È*)", "", 0, 0, 1); + x3("(¤ë+)", "¤ë¤ë¤ë¤ë¤ë¤ë¤ë", 0, 14, 1); + x3("(¤Õ+|¤Ø*)", "¤Õ¤Õ¤Õ¤Ø¤Ø", 0, 6, 1); + x3("(¤¢+|¤¤?)", "¤¤¤¤¤¤¤¢¤¢", 0, 2, 1); + x3("(¤¢¤¤¤¦)?", "¤¢¤¤¤¦", 0, 6, 1); + x3("(¤¢¤¤¤¦)*", "¤¢¤¤¤¦", 0, 6, 1); + x3("(¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1); + x3("(¤µ¤·¤¹|¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1); + x3("([¤Ê¤Ë¤Ì][¤«¤­¤¯]|¤«¤­¤¯)+", "¤«¤­¤¯", 0, 6, 1); + x3("((?i:¤¢¤¤¤¦))", "¤¢¤¤¤¦", 0, 6, 1); + x3("((?m:¤¢.¤¦))", "¤¢\n¤¦", 0, 5, 1); + x3("((?=¤¢¤ó)¤¢)", "¤¢¤ó¤¤", 0, 2, 1); + x3("¤¢¤¤¤¦|(.¤¢¤¤¤¨)", "¤ó¤¢¤¤¤¨", 0, 8, 1); + x3("¤¢*(.)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1); + x3("¤¢*?(.)", "¤¢¤¢¤¢¤¢¤ó", 0, 2, 1); + x3("¤¢*?(¤ó)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1); + x3("[¤¤¤¦¤¨]¤¢*(.)", "¤¨¤¢¤¢¤¢¤¢¤ó", 10, 12, 1); + x3("(\\A¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1); + n("(\\A¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦"); + x3("(^¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1); + n("(^¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦"); + x3("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë", 4, 8, 1); + n("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë¤ë"); + x2("(̵)\\1", "̵̵", 0, 4); + n("(̵)\\1", "̵Éð"); + x2("(¶õ?)\\1", "¶õ¶õ", 0, 4); + x2("(¶õ?\?)\\1", "¶õ¶õ", 0, 0); + x2("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 8); + x3("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 4, 1); + x2("¤¢(¤¤*)\\1", "¤¢¤¤¤¤¤¤¤¤", 0, 10); + x2("¤¢(¤¤*)\\1", "¤¢¤¤", 0, 2); + x2("(¤¢*)(¤¤*)\\1\\2", "¤¢¤¢¤¢¤¤¤¤¤¢¤¢¤¢¤¤¤¤", 0, 20); + x2("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 0, 14); + x3("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 6, 10, 2); + x2("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 16); + x3("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 6, 7); + x2("(¤Ï)(¤Ò)(¤Õ)\\2\\1\\3", "¤Ï¤Ò¤Õ¤Ò¤Ï¤Õ", 0, 12); + x2("([¤­-¤±])\\1", "¤¯¤¯", 0, 4); + x2("(\\w\\d\\s)\\1", "¤¢5 ¤¢5 ", 0, 8); + n("(\\w\\d\\s)\\1", "¤¢5 ¤¢5"); + x2("(ï¡©|[¤¢-¤¦]{3})\\1", "ï¡©", 0, 8); + x2("...(ï¡©|[¤¢-¤¦]{3})\\1", "¤¢a¤¢Ã¯¡©Ã¯¡©", 0, 13); + x2("(ï¡©|[¤¢-¤¦]{3})\\1", "¤¦¤¤¤¦¤¦¤¤¤¦", 0, 12); + x2("(^¤³)\\1", "¤³¤³", 0, 4); + n("(^¤à)\\1", "¤á¤à¤à"); + n("(¤¢$)\\1", "¤¢¤¢"); + n("(¤¢¤¤\\Z)\\1", "¤¢¤¤"); + x2("(¤¢*\\Z)\\1", "¤¢", 2, 2); + x2(".(¤¢*\\Z)\\1", "¤¤¤¢", 2, 4); + x3("(.(¤ä¤¤¤æ)\\2)", "z¤ä¤¤¤æ¤ä¤¤¤æ", 0, 13, 1); + x3("(.(..\\d.)\\2)", "¤¢12341234", 0, 10, 1); + x2("((?i:¤¢v¤º))\\1", "¤¢v¤º¤¢v¤º", 0, 10); + x2("(?<¶ò¤«>ÊÑ|\\(\\g<¶ò¤«>\\))", "((((((ÊÑ))))))", 0, 14); + x2("\\A(?:\\g<°¤_1>|\\g<±¾_2>|\\z½ªÎ» (?<°¤_1>´Ñ|¼«\\g<±¾_2>¼«)(?<±¾_2>ºß|Êî»§\\g<°¤_1>Êî»§))$", "Êî»§¼«Êî»§¼«ºß¼«Êî»§¼«Êî»§", 0, 26); + x2("[[¤Ò¤Õ]]", "¤Õ", 0, 2); + x2("[[¤¤¤ª¤¦]¤«]", "¤«", 0, 2); + n("[[^¤¢]]", "¤¢"); + n("[^[¤¢]]", "¤¢"); + x2("[^[^¤¢]]", "¤¢", 0, 2); + x2("[[¤«¤­¤¯]&&¤­¤¯]", "¤¯", 0, 2); + n("[[¤«¤­¤¯]&&¤­¤¯]", "¤«"); + n("[[¤«¤­¤¯]&&¤­¤¯]", "¤±"); + x2("[¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ", 0, 2); + n("[^¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ"); + x2("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¤", 0, 2); + n("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¢"); + x2("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤­", 0, 2); + n("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤¤"); + x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¦", 0, 2); + x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¨", 0, 2); + n("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤«"); + x2("[¤¢-&&-¤¢]", "-", 0, 1); + x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]q-w]", "¤¨", 0, 2); + x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "f", 0, 1); + x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "g", 0, 1); + n("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "2"); + x2("a¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É<\\/b>", "a¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É", 0, 32); + x2(".¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É<\\/b>", "a¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É", 0, 32); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/ext/mbstring/oniguruma/testu.c b/ext/mbstring/oniguruma/testu.c new file mode 100644 index 0000000000..5652988ca9 --- /dev/null +++ b/ext/mbstring/oniguruma/testu.c @@ -0,0 +1,911 @@ +/* + * This program was generated by testconv.rb. + */ +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +static OnigEncoding ENC; +#endif + +#define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p) + +static void uconv(char* from, char* to, int len) +{ + int i; + unsigned char c; + char *q; + + q = to; + + for (i = 0; i < len; i += 2) { + c = (unsigned char )from[i]; + if (c == 0) { + c = (unsigned char )from[i+1]; + if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { + sprintf(q, "\\%03o", c); + q += 4; + } + else { + sprintf(q, "%c", c); + q++; + } + } + else { + sprintf(q, "\\%03o", c); + q += 4; + c = (unsigned char )from[i+1]; + sprintf(q, "\\%03o", c); + q += 4; + } + } + + *q = 0; +} + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + char cpat[4000], cstr[4000]; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[20]; + + uconv(pattern, cpat, ulen(pattern)); + uconv(str, cstr, ulen(str)); + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigCompileInfo ci; + OnigErrorInfo einfo; + + uconv(pattern, cpat, ulen(pattern)); + uconv(str, cstr, ulen(str)); + +#if 0 + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)), + ONIG_OPTION_DEFAULT, ENC, ONIG_SYNTAX_DEFAULT, &einfo); +#else + ci.num_of_elements = 5; + ci.pattern_enc = ENC; + ci.target_enc = ENC; + ci.syntax = ONIG_SYNTAX_DEFAULT; + ci.option = ONIG_OPTION_DEFAULT; + ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT; + + r = onig_new_deluxe(®, (UChar* )pattern, + (UChar* )(pattern + ulen(pattern)), + &ci, &einfo); +#endif + + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)), + (UChar* )str, (UChar* )(str + ulen(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifndef POSIX_TEST + region = onig_region_new(); +#endif +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_UTF16_BE); +#else + ENC = ONIG_ENCODING_UTF16_BE; +#endif + x2("\000\000", "\000\000", 0, 0); + x2("\000^\000\000", "\000\000", 0, 0); + x2("\000$\000\000", "\000\000", 0, 0); + x2("\000\134\000G\000\000", "\000\000", 0, 0); + x2("\000\134\000A\000\000", "\000\000", 0, 0); + x2("\000\134\000Z\000\000", "\000\000", 0, 0); + x2("\000\134\000z\000\000", "\000\000", 0, 0); + x2("\000^\000$\000\000", "\000\000", 0, 0); + x2("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2); + x2("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2); + x2("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2); + x2("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4); + x2("\000\000", "\000a\000\000", 0, 0); + x2("\000a\000\000", "\000a\000\000", 0, 2); + x2("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2); + x2("\000a\000a\000\000", "\000a\000a\000\000", 0, 4); + x2("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70); + x2("\000a\000b\000\000", "\000a\000b\000\000", 0, 4); + x2("\000b\000\000", "\000a\000b\000\000", 2, 4); + x2("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6); + x2("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20); + x2("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2); + x2("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2); + x2("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4); + x2("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14); + x2("\000.\000\000", "\000a\000\000", 0, 2); + n("\000.\000\000", "\000\000"); + x2("\000.\000.\000\000", "\000a\000b\000\000", 0, 4); + x2("\000\134\000w\000\000", "\000e\000\000", 0, 2); + n("\000\134\000W\000\000", "\000e\000\000"); + x2("\000\134\000s\000\000", "\000 \000\000", 0, 2); + x2("\000\134\000S\000\000", "\000b\000\000", 0, 2); + x2("\000\134\000d\000\000", "\0004\000\000", 0, 2); + n("\000\134\000D\000\000", "\0004\000\000"); + x2("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0); + x2("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2); + x2("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2); + x2("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4); + x2("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0); + x2("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000a\000b\000]\000\000", "\000c\000\000"); + x2("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2); + n("\000[\000^\000a\000]\000\000", "\000a\000\000"); + x2("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2); + x2("\000[\000]\000]\000\000", "\000]\000\000", 0, 2); + n("\000[\000^\000]\000]\000\000", "\000]\000\000"); + x2("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6); + x2("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2); + x2("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2); + x2("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2); + n("\000[\000\134\000w\000]\000\000", "\000 \000\000"); + x2("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4); + x2("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2); + n("\000[\000\134\000d\000]\000\000", "\000e\000\000"); + x2("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2); + n("\000[\000\134\000D\000]\000\000", "\0003\000\000"); + x2("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2); + n("\000[\000\134\000s\000]\000\000", "\000a\000\000"); + x2("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000\134\000S\000]\000\000", "\000 \000\000"); + x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2); + n("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000"); + x2("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2); + x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2); + x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14); + x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8); + n("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000"); + x2("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2); + x2("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2); + x2("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2); + x2("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2); + n("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000"); + n("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000"); + x2("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2); + x2("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2); + x2("\000[\000&\000]\000\000", "\000&\000\000", 0, 2); + x2("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2); + x2("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2); + n("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000"); + n("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000"); + x2("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000"); + n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000"); + x2("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2); + n("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000"); + x2("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000"); + x2("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2); + n("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000"); + x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2); + n("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000"); + x2("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2); + n("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000"); + n("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000"); + x2("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8); + x2("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10); + x2("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14); + x2("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10); + x2("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8); + n("\000.\000a\000\000", "\000a\000b\000\000"); + x2("\000.\000a\000\000", "\000a\000a\000\000", 0, 4); + x2("\000^\000a\000\000", "\000a\000\000", 0, 2); + x2("\000^\000a\000$\000\000", "\000a\000\000", 0, 2); + x2("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2); + n("\000^\000\134\000w\000$\000\000", "\000 \000\000"); + x2("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6); + x2("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); + x2("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); + x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16); + x2("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0); + x2("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6); + x2("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6); + x2("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6); + x2("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2); + x2("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4); + n("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000"); + n("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000"); + n("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000"); + n("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000"); + x2("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4); + x2("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4); + x2("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4); + x2("\000\134\000w\000\000", "\000_\000\000", 0, 2); + n("\000\134\000W\000\000", "\000_\000\000"); + x2("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2); + n("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000"); + x2("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2); + n("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000"); + x2("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2); + x2("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2); + n("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000"); + x2("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2); + n("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000"); + x2("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2); + n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000"); + n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000"); + x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2); + x2("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4); + x2("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4); + n("\000.\000\000", "\000\012\000\000"); + x2("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2); + x2("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4); + x2("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6); + x2("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26); + x2("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20); + n("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000"); + n("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000"); + x2("\000a\000?\000\000", "\000\000", 0, 0); + x2("\000a\000?\000\000", "\000b\000\000", 0, 0); + x2("\000a\000?\000\000", "\000a\000\000", 0, 2); + x2("\000a\000*\000\000", "\000\000", 0, 0); + x2("\000a\000*\000\000", "\000a\000\000", 0, 2); + x2("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0); + n("\000a\000+\000\000", "\000\000"); + x2("\000a\000+\000\000", "\000a\000\000", 0, 2); + x2("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8); + x2("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4); + x2("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10); + x2("\000.\000?\000\000", "\000\000", 0, 0); + x2("\000.\000?\000\000", "\000f\000\000", 0, 2); + x2("\000.\000?\000\000", "\000\012\000\000", 0, 0); + x2("\000.\000*\000\000", "\000\000", 0, 0); + x2("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10); + x2("\000.\000+\000\000", "\000z\000\000", 0, 2); + x2("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12); + x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8); + x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); + x2("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); + x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46); + x2("\000a\000|\000b\000\000", "\000a\000\000", 0, 2); + x2("\000a\000|\000b\000\000", "\000b\000\000", 0, 2); + x2("\000|\000a\000\000", "\000a\000\000", 0, 0); + x2("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0); + x2("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4); + x2("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4); + x2("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6); + x2("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8); + x2("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4); + x2("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4); + x2("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4); + n("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000"); + x2("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4); + x2("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2); + x2("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6); + x2("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2); + x2("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6); + x2("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2); + x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4); + x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2); + x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4); + x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2); + x2("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2); + n("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000"); + x2("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2); + x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2); + x2("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2); + x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4); + x2("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6); + x2("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6); + x2("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6); + n("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000"); + x2("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8); + x2("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2); + x2("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0); + x2("\000a\000?\000|\000b\000\000", "\000\000", 0, 0); + x2("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4); + x2("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0); + x2("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2); + x2("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0); + x2("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6); + x2("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2); + n("\000a\000+\000|\000b\000+\000\000", "\000\000"); + x2("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2); + x2("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4); + x2("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6); + x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8); + x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10); + x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4); + x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10); + x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4); + x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8); + x2("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4); + x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6); + x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); + x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); + x2("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8); + x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); + n("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000"); + n("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); + n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); + n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000"); + x2("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4); + n("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000"); + x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16); + x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14); + x2("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2); + x2("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2); + n("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000"); + x2("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2); + x2("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6); + x2("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0); + n("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000"); + x2("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); + x2("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6); + x2("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0); + x2("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2); + x2("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); + x2("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); + x2("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4); + x2("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); + x2("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); + x2("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8); + x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8); + x2("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); + n("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000"); + x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12); + x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); + x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8); + x2("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10); + x2("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12); + x2("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8); + x2("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); + x2("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); + x2("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); + x3("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1); + x3("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1); + x2("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4); + x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1); + x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2); + x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20); + x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1); + x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2); + x3("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3); + x3("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4); + x2("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2); + x3("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1); + x3("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2); + x3("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1); + x3("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1); + x3("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1); + x3("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1); + x3("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1); + x3("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1); + x3("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1); + x2("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12); + x3("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1); + x3("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1); + x3("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1); + x2("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6); + x3("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1); + x3("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1); + x3("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1); + x3("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1); + x3("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1); + x3("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); + n("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); + x3("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); + n("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); + x3("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1); + n("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000"); + n("\000(\000\134\0001\000)\000\000", "\000\000"); + n("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000"); + n("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000"); + n("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000"); + x2("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8); + x2("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2); + n("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000"); + x2("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); + n("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000"); + x2("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); + x2("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0); + x2("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8); + x3("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1); + x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10); + x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2); + x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20); + x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14); + x2("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16); + x3("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7); + x2("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12); + x2("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4); + x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12); + n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000"); + x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12); + x2("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18); + x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12); + x2("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); + n("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000"); + n("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000"); + n("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000"); + x2("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2); + x2("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4); + x3("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1); + x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); + x2("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8); + n("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000"); + x2("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4); + n("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000"); + x2("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4); + x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6); + x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4); + x2("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4); + x2("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4); + x2("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4); + n("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000"); + x2("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2); + n("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000"); + x2("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8); + x2("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16); + x2("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6); + x2("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0); + x2("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12); + x3("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1); + x2("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6); + x2("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8); + x2("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16); + x2("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36); + x3("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1); + x2("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6); + x2("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4); + x2("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18); + n("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000"); + x2("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000a\000-\000p\000y\000u\000m\000p\000y\000u\000m\000\000", 4, 20); + x3("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000x\000x\000x\000x\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000\000", 8, 36, 14); + x3("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0000\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0006\000>\000a\000a\000a\000)\000(\000?\000<\000n\000a\000m\000e\0001\0007\000>\000)\000$\000\000", "\000a\000a\000a\000\000", 0, 6, 16); + x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26); + x3("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1); + x2("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18); + x2("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6); + x3("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1); + x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14); + x2("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8); + x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10); + x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20); + x2("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10); + x2("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0); + x3("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1); + x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12); + x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18); + x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2); + x2("\217\372\000\000", "\217\372\000\000", 0, 2); + x2("\000\000", "0B\000\000", 0, 0); + x2("0B\000\000", "0B\000\000", 0, 2); + n("0D\000\000", "0B\000\000"); + x2("0F0F\000\000", "0F0F\000\000", 0, 4); + x2("0B0D0F\000\000", "0B0D0F\000\000", 0, 6); + x2("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70); + x2("0B\000\000", "0D0B\000\000", 2, 4); + x2("0D0F\000\000", "0B0D0F\000\000", 2, 6); + x2("e\207\000\000", "e\207\000\000", 0, 2); + x2("\000.\000\000", "0B\000\000", 0, 2); + x2("\000.\000.\000\000", "0K0M\000\000", 0, 4); + x2("\000\134\000w\000\000", "0J\000\000", 0, 2); + n("\000\134\000W\000\000", "0B\000\000"); + x2("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4); + x2("\000\134\000S\000\000", "0]\000\000", 0, 2); + x2("\000\134\000S\000\000", "o\042\000\000", 0, 2); + x2("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0); + x2("\000\134\000b\000\000", "\000 0{\000\000", 2, 2); + x2("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2); + x2("\000\134\000B\000\000", "0F\000 \000\000", 4, 4); + x2("\000\134\000B\000\000", "\000 0D\000\000", 0, 0); + x2("\000[0_0a\000]\000\000", "0a\000\000", 0, 2); + n("\000[0j0k\000]\000\000", "0l\000\000"); + x2("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2); + n("\000[\000^0Q\000]\000\000", "0Q\000\000"); + x2("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2); + n("\000[\000\134\000d\000]\000\000", "0u\000\000"); + x2("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2); + n("\000[\000\134\000s\000]\000\000", "0O\000\000"); + x2("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2); + x2("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2); + x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8); + n("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000"); + x2("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6); + x2("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10); + x2("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14); + x2("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10); + x2("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8); + n("\000.0D\000\000", "0D0H\000\000"); + x2("\000.0J\000\000", "0J0J\000\000", 0, 4); + x2("\000^0B\000\000", "0B\000\000", 0, 2); + x2("\000^0\200\000$\000\000", "0\200\000\000", 0, 2); + x2("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2); + x2("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12); + x2("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14); + x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16); + x2("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6); + x2("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6); + x2("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6); + x2("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6); + x2("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4); + n("\000\134\000G0H\000\000", "0F0H0J\000\000"); + n("0h0f\000\134\000G\000\000", "0h0f\000\000"); + n("0~0\177\000\134\000A\000\000", "0~0\177\000\000"); + n("0~\000\134\000A0\177\000\000", "0~0\177\000\000"); + x2("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2); + n("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000"); + x2("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2); + n("\000(\000?\000!0h\000)0B\000\000", "0h\000\000"); + x2("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2); + x2("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4); + n("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000"); + x2("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4); + x2("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6); + x2("0B\000?\000\000", "\000\000", 0, 0); + x2("Y\011\000?\000\000", "S\026\000\000", 0, 0); + x2("Y\011\000?\000\000", "Y\011\000\000", 0, 2); + x2("\221\317\000*\000\000", "\000\000", 0, 0); + x2("\221\317\000*\000\000", "\221\317\000\000", 0, 2); + x2("[P\000*\000\000", "[P[P[P\000\000", 0, 6); + x2("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0); + n("\134q\000+\000\000", "\000\000"); + x2("l\263\000+\000\000", "l\263\000\000", 0, 2); + x2("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8); + x2("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4); + x2("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10); + x2("\000.\000?\000\000", "0_\000\000", 0, 2); + x2("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8); + x2("\000.\000+\000\000", "0\215\000\000", 0, 2); + x2("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8); + x2("0B\000|0D\000\000", "0B\000\000", 0, 2); + x2("0B\000|0D\000\000", "0D\000\000", 0, 2); + x2("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4); + x2("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4); + x2("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6); + x2("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8); + x2("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4); + x2("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4); + x2("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6); + n("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000"); + x2("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4); + x2("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2); + x2("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6); + x2("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2); + x2("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6); + x2("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2); + x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4); + x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2); + x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2); + x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4); + x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2); + x2("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2); + x2("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2); + x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2); + x2("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2); + x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2); + x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2); + x2("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2); + x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2); + x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4); + x2("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6); + x2("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6); + x2("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6); + x2("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6); + n("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000"); + x2("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8); + x2("0B\000?\000|0D\000\000", "0B\000\000", 0, 2); + x2("0B\000?\000|0D\000\000", "0D\000\000", 0, 0); + x2("0B\000?\000|0D\000\000", "\000\000", 0, 0); + x2("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4); + x2("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0); + x2("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2); + x2("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4); + x2("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0); + x2("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6); + x2("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2); + x2("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0); + n("0B\000+\000|0D\000+\000\000", "\000\000"); + x2("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2); + x2("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4); + x2("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8); + x2("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16); + x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10); + x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4); + x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8); + x2("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4); + x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6); + x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12); + x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); + x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); + x2("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4); + n("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000"); + x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16); + x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14); + x2("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8); + x2("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2); + n("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000"); + x2("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2); + x2("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6); + x2("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0); + n("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000"); + x2("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0); + x2("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6); + x2("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0); + x2("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2); + x2("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); + x2("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2); + x2("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4); + x2("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); + x2("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0); + x2("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0); + x2("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2); + x2("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0); + x2("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8); + x2("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8); + x2("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0); + x2("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); + n("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000"); + x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12); + x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); + x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8); + x2("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10); + x2("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12); + x3("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1); + x3("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1); + x2("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4); + x3("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1); + x3("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2); + x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20); + x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1); + x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2); + x3("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3); + x3("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4); + x3("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2); + x2("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2); + x3("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1); + x3("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2); + x3("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1); + x3("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1); + x3("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1); + x3("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1); + x3("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1); + x3("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1); + x3("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1); + x3("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1); + x3("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1); + x3("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1); + x3("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); + x3("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1); + x3("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); + x3("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1); + x3("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); + n("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); + x3("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); + n("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); + x3("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1); + n("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000"); + x2("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4); + n("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000"); + x2("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4); + x2("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0); + x2("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8); + x3("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1); + x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10); + x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2); + x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20); + x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14); + x3("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2); + x2("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16); + x3("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7); + x2("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12); + x2("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4); + x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12); + n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000"); + x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8); + x2("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14); + x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12); + x2("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4); + n("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000"); + n("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000"); + n("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000"); + x2("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2); + x2("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4); + x3("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1); + x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); + x2("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12); + x2("\000(\000?\000Y\011\000|\000\134\000(\000\134\000g\000\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26); + x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000\201\352\000)\000(\000?\000W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\351\205\251\000\000", 0, 26); + x2("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2); + x2("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2); + n("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000"); + n("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000"); + x2("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2); + x2("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2); + n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000"); + n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000"); + x2("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2); + n("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000"); + x2("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2); + n("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000"); + x2("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2); + n("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000"); + x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2); + x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2); + n("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000"); + x2("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2); + n("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000"); + x2("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); + x2("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/ext/mbstring/oniguruma/win32/Makefile b/ext/mbstring/oniguruma/win32/Makefile new file mode 100644 index 0000000000..d08722baa9 --- /dev/null +++ b/ext/mbstring/oniguruma/win32/Makefile @@ -0,0 +1,200 @@ +# Oniguruma Makefile for Win32 + +product_name = oniguruma + +CPPFLAGS = +CFLAGS = -O2 -nologo /W3 +LDFLAGS = +LOADLIBES = +ARLIB = lib +ARLIB_FLAGS = -nologo +ARDLL = cl +ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll +LINKFLAGS = -link -incremental:no -pdb:none + +INSTALL = install -c +CP = copy +CC = cl +DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT +RUBYDIR = .. + +subdirs = + +libbase = onig +libname = $(libbase)_s.lib +dllname = $(libbase).dll +dlllib = $(libbase).lib + +onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h +posixheaders = onigposix.h +headers = $(posixheaders) $(onigheaders) + +onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \ + regexec.obj regenc.obj regsyntax.obj regtrav.obj \ + regversion.obj st.obj +posixobjs = regposix.obj regposerr.obj +libobjs = $(onigobjs) $(posixobjs) + +jp_objs = $(encdir)\euc_jp.obj $(encdir)\sjis.obj +iso8859_objs = $(encdir)\iso8859_1.obj $(encdir)\iso8859_2.obj \ + $(encdir)\iso8859_3.obj $(encdir)\iso8859_4.obj \ + $(encdir)\iso8859_5.obj $(encdir)\iso8859_6.obj \ + $(encdir)\iso8859_7.obj $(encdir)\iso8859_8.obj \ + $(encdir)\iso8859_9.obj $(encdir)\iso8859_10.obj \ + $(encdir)\iso8859_11.obj $(encdir)\iso8859_13.obj \ + $(encdir)\iso8859_14.obj $(encdir)\iso8859_15.obj \ + $(encdir)\iso8859_16.obj + +encobjs = $(encdir)\ascii.obj $(encdir)\utf8.obj \ + $(encdir)\unicode.obj \ + $(encdir)\utf16_be.obj $(encdir)\utf16_le.obj \ + $(encdir)\utf32_be.obj $(encdir)\utf32_le.obj \ + $(jp_objs) $(iso8859_objs) \ + $(encdir)\euc_tw.obj $(encdir)\euc_kr.obj $(encdir)\big5.obj \ + $(encdir)\gb18030.obj \ + $(encdir)\koi8_r.obj \ + $(encdir)\cp1251.obj # $(encdir)\koi8.obj + +onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \ + regsyntax.c regtrav.c regversion.c reggnu.c st.c +posixsources = regposix.c regposerr.c +libsources = $(posixsources) $(onigsources) +rubysources = $(onigsources) + +encdir = enc +patchfiles = re.c.168.patch re.c.181.patch +distfiles = README COPYING HISTORY \ + Makefile.in configure.in config.h.in configure \ + $(headers) $(libsources) $(patchfiles) \ + test.rb testconv.rb +testc = testc +testp = testp + +makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' + +.SUFFIXES: +.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo + +.c.obj: + $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< + +# targets +default: all + +setup: + $(CP) win32\config.h config.h + $(CP) win32\testc.c testc.c + + +all: $(libname) $(dllname) + +$(libname): $(libobjs) $(encobjs) + $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs) + +$(dllname): $(libobjs) $(encobjs) + $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) + +regparse.obj: regparse.c $(onigheaders) config.h st.h +regext.obj: regext.c $(onigheaders) config.h +regtrav.obj: regtrav.c $(onigheaders) config.h +regcomp.obj: regcomp.c $(onigheaders) config.h +regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h +reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h +regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h +regenc.obj: regenc.c regenc.h oniguruma.h config.h +regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h +regversion.obj: regversion.c oniguruma.h config.h +regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h +regposerr.obj: regposerr.c $(posixheaders) config.h +st.obj: st.c regint.h oniguruma.h config.h st.h + +$(encdir)\ascii.obj: $(encdir)\ascii.c regenc.h config.h +$(encdir)\unicode.obj: $(encdir)\unicode.c regenc.h config.h +$(encdir)\utf8.obj: $(encdir)\utf8.c regenc.h config.h +$(encdir)\utf16_be.obj: $(encdir)\utf16_be.c regenc.h config.h +$(encdir)\utf16_le.obj: $(encdir)\utf16_le.c regenc.h config.h +$(encdir)\utf32_be.obj: $(encdir)\utf32_be.c regenc.h config.h +$(encdir)\utf32_le.obj: $(encdir)\utf32_le.c regenc.h config.h +$(encdir)\euc_jp.obj: $(encdir)\euc_jp.c regenc.h config.h +$(encdir)\euc_tw.obj: $(encdir)\euc_tw.c regenc.h config.h +$(encdir)\euc_kr.obj: $(encdir)\euc_kr.c regenc.h config.h +$(encdir)\sjis.obj: $(encdir)\sjis.c regenc.h config.h +$(encdir)\iso8859_1.obj: $(encdir)\iso8859_1.c regenc.h config.h +$(encdir)\iso8859_2.obj: $(encdir)\iso8859_2.c regenc.h config.h +$(encdir)\iso8859_3.obj: $(encdir)\iso8859_3.c regenc.h config.h +$(encdir)\iso8859_4.obj: $(encdir)\iso8859_4.c regenc.h config.h +$(encdir)\iso8859_5.obj: $(encdir)\iso8859_5.c regenc.h config.h +$(encdir)\iso8859_6.obj: $(encdir)\iso8859_6.c regenc.h config.h +$(encdir)\iso8859_7.obj: $(encdir)\iso8859_7.c regenc.h config.h +$(encdir)\iso8859_8.obj: $(encdir)\iso8859_8.c regenc.h config.h +$(encdir)\iso8859_9.obj: $(encdir)\iso8859_9.c regenc.h config.h +$(encdir)\iso8859_10.obj: $(encdir)\iso8859_10.c regenc.h config.h +$(encdir)\iso8859_11.obj: $(encdir)\iso8859_11.c regenc.h config.h +$(encdir)\iso8859_13.obj: $(encdir)\iso8859_13.c regenc.h config.h +$(encdir)\iso8859_14.obj: $(encdir)\iso8859_14.c regenc.h config.h +$(encdir)\iso8859_15.obj: $(encdir)\iso8859_15.c regenc.h config.h +$(encdir)\iso8859_16.obj: $(encdir)\iso8859_16.c regenc.h config.h +$(encdir)\koi8.obj: $(encdir)\koi8.c regenc.h config.h +$(encdir)\koi8_r.obj: $(encdir)\koi8_r.c regenc.h config.h +$(encdir)\cp1251.obj: $(encdir)\cp1251.c regenc.h config.h +$(encdir)\big5.obj: $(encdir)\big5.c regenc.h config.h +$(encdir)\gb18030.obj: $(encdir)\gb18030.c regenc.h config.h + + +# Ruby test +rtest: + $(RUBYDIR)\win32\ruby -w -Ke test.rb + +# C library test +ctest: $(testc) + .\$(testc) + +# POSIX C library test +ptest: $(testp) + .\$(testp) + +$(testc): $(testc).c $(libname) + $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname) + +$(testp): $(testc).c $(dlllib) + $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib) + +#$(testc)u.c: test.rb testconvu.rb +# ruby -Ke testconvu.rb test.rb > $@ + +$(testc)u: $(testc)u.c $(libname) + $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) + +clean: + del *.obj $(encdir)\*.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj + + +# backup file suffix +SORIG = ruby_orig + +# ruby 1.9 source update +19: + $(CP) regerror.c $(RUBYDIR) + $(CP) regparse.c $(RUBYDIR) + $(CP) regcomp.c $(RUBYDIR) + $(CP) regexec.c $(RUBYDIR) + $(CP) regenc.c $(RUBYDIR) + $(CP) regint.h $(RUBYDIR) + $(CP) regparse.h $(RUBYDIR) + $(CP) regenc.h $(RUBYDIR) + $(CP) oniguruma.h $(RUBYDIR) + $(CP) enc\ascii.c $(RUBYDIR) + $(CP) enc\utf8.c $(RUBYDIR) + $(CP) enc\euc_jp.c $(RUBYDIR) + $(CP) enc\sjis.c $(RUBYDIR) + $(CP) enc\unicode.c $(RUBYDIR) + + +samples: all + $(CC) $(CFLAGS) -I. -o simple sample\simple.c $(dlllib) + $(CC) $(CFLAGS) -I. -o posix sample\posix.c $(dlllib) + $(CC) $(CFLAGS) -I. -o names sample\names.c $(dlllib) + $(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib) + $(CC) $(CFLAGS) -I. -o sql sample\sql.c $(dlllib) + $(CC) $(CFLAGS) -I. -o encode sample\encode.c $(dlllib) + $(CC) $(CFLAGS) -I. -o syntax sample\syntax.c $(dlllib) diff --git a/ext/mbstring/oniguruma/win32/testc.c b/ext/mbstring/oniguruma/win32/testc.c new file mode 100644 index 0000000000..acc13189d5 --- /dev/null +++ b/ext/mbstring/oniguruma/win32/testc.c @@ -0,0 +1,863 @@ +/* + * This program was generated by testconv.rb. + */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +#endif + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_SJIS); +#else + region = onig_region_new(); +#endif + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?a)", "a", 0, 1); + x2("(?ab)\\g", "abab", 0, 4); + x2("(?.zv.)\\k", "azvbazvb", 0, 8); + x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); + x2("(?|a\\g)+", "", 0, 0); + x2("(?|\\(\\g\\))+$", "()(())", 0, 6); + x3("\\g(?.){0}", "X", 0, 1, 1); + x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); + x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); + x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); + x2("(?:(?)|(?efg))\\k", "", 0, 0); + x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); + n("(?:(?abc)|(?efg))\\k", "abcefg"); + x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); + x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); + x2("(?a|\\(\\g\\))", "a", 0, 1); + x2("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); + x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); + x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); + x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); + x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); + x2("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("\\xED\\xF2", "\xed\xf2", 0, 2); + x2("", "‚ ", 0, 0); + x2("‚ ", "‚ ", 0, 2); + n("‚¢", "‚ "); + x2("‚¤‚¤", "‚¤‚¤", 0, 4); + x2("‚ ‚¢‚¤", "‚ ‚¢‚¤", 0, 6); + x2("‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", "‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", 0, 70); + x2("‚ ", "‚¢‚ ", 2, 4); + x2("‚¢‚¤", "‚ ‚¢‚¤", 2, 6); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "‚ ", 0, 2); + x2("..", "‚©‚«", 0, 4); + x2("\\w", "‚¨", 0, 2); + n("\\W", "‚ "); + x2("[\\W]", "‚¤$", 2, 3); + x2("\\S", "‚»", 0, 2); + x2("\\S", "Š¿", 0, 2); + x2("\\b", "‹C ", 0, 0); + x2("\\b", " ‚Ù", 1, 1); + x2("\\B", "‚¹‚» ", 2, 2); + x2("\\B", "‚¤ ", 3, 3); + x2("\\B", " ‚¢", 0, 0); + x2("[‚½‚¿]", "‚¿", 0, 2); + n("[‚È‚É]", "‚Ê"); + x2("[‚¤-‚¨]", "‚¦", 0, 2); + n("[^‚¯]", "‚¯"); + x2("[\\w]", "‚Ë", 0, 2); + n("[\\d]", "‚Ó"); + x2("[\\D]", "‚Í", 0, 2); + n("[\\s]", "‚­"); + x2("[\\S]", "‚Ö", 0, 2); + x2("[\\w\\d]", "‚æ", 0, 2); + x2("[\\w\\d]", " ‚æ", 3, 5); + n("\\w‹SŽÔ", " ‹SŽÔ"); + x2("‹S\\WŽÔ", "‹S ŽÔ", 0, 5); + x2("‚ .‚¢.‚¤", "‚ ‚ ‚¢‚¢‚¤", 0, 10); + x2(".\\w‚¤\\W..‚¼", "‚¦‚¤‚¤ ‚¤‚¼‚¼", 0, 13); + x2("\\s\\w‚±‚±‚±", " ‚±‚±‚±‚±", 0, 9); + x2("‚ ‚ .‚¯", "‚ ‚ ‚¯‚¯", 0, 8); + n(".‚¢", "‚¢‚¦"); + x2(".‚¨", "‚¨‚¨", 0, 4); + x2("^‚ ", "‚ ", 0, 2); + x2("^‚Þ$", "‚Þ", 0, 2); + x2("^\\w$", "‚É", 0, 2); + x2("^\\w‚©‚«‚­‚¯‚±$", "z‚©‚«‚­‚¯‚±", 0, 11); + x2("^\\w...‚¤‚¦‚¨$", "z‚ ‚¢‚¤‚¤‚¦‚¨", 0, 13); + x2("\\w\\w\\s\\W‚¨‚¨‚¨\\d", "a‚¨ ‚¨‚¨‚¨4", 0, 12); + x2("\\A‚½‚¿‚Â", "‚½‚¿‚Â", 0, 6); + x2("‚Þ‚ß‚à\\Z", "‚Þ‚ß‚à", 0, 6); + x2("‚©‚«‚­\\z", "‚©‚«‚­", 0, 6); + x2("‚©‚«‚­\\Z", "‚©‚«‚­\n", 0, 6); + x2("\\G‚Û‚Ò", "‚Û‚Ò", 0, 4); + n("\\G‚¦", "‚¤‚¦‚¨"); + n("‚Æ‚Ä\\G", "‚Æ‚Ä"); + n("‚Ü‚Ý\\A", "‚Ü‚Ý"); + n("‚Ü\\A‚Ý", "‚Ü‚Ý"); + x2("(?=‚¹)‚¹", "‚¹", 0, 2); + n("(?=‚¤).", "‚¢"); + x2("(?!‚¤)‚©", "‚©", 0, 2); + n("(?!‚Æ)‚ ", "‚Æ"); + x2("(?i:‚ )", "‚ ", 0, 2); + x2("(?i:‚Ô‚×)", "‚Ô‚×", 0, 4); + n("(?i:‚¢)", "‚¤"); + x2("(?m:‚æ.)", "‚æ\n", 0, 3); + x2("(?m:.‚ß)", "‚Ü\n‚ß", 2, 5); + x2("‚ ?", "", 0, 0); + x2("•Ï?", "‰»", 0, 0); + x2("•Ï?", "•Ï", 0, 2); + x2("—Ê*", "", 0, 0); + x2("—Ê*", "—Ê", 0, 2); + x2("Žq*", "ŽqŽqŽq", 0, 6); + x2("”n*", "Ž­”n”n”n”n", 0, 0); + n("ŽR+", ""); + x2("‰Í+", "‰Í", 0, 2); + x2("Žž+", "ŽžŽžŽžŽž", 0, 8); + x2("‚¦+", "‚¦‚¦‚¤‚¤‚¤", 0, 4); + x2("‚¤+", "‚¨‚¤‚¤‚¤‚¤", 2, 10); + x2(".?", "‚½", 0, 2); + x2(".*", "‚ς҂ՂØ", 0, 8); + x2(".+", "‚ë", 0, 2); + x2(".+", "‚¢‚¤‚¦‚©\n", 0, 8); + x2("‚ |‚¢", "‚ ", 0, 2); + x2("‚ |‚¢", "‚¢", 0, 2); + x2("‚ ‚¢|‚¢‚¤", "‚ ‚¢", 0, 4); + x2("‚ ‚¢|‚¢‚¤", "‚¢‚¤", 0, 4); + x2("‚ð(?:‚©‚«|‚«‚­)", "‚ð‚©‚«", 0, 6); + x2("‚ð(?:‚©‚«|‚«‚­)‚¯", "‚ð‚«‚­‚¯", 0, 8); + x2("‚ ‚¢|(?:‚ ‚¤|‚ ‚ð)", "‚ ‚ð", 0, 4); + x2("‚ |‚¢|‚¤", "‚¦‚¤", 2, 4); + x2("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚µ‚·‚¹", 0, 6); + n("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚·‚¹"); + x2("‚ |^‚í", "‚Ô‚ ", 2, 4); + x2("‚ |^‚ð", "‚ð‚ ", 0, 2); + x2("‹S|\\GŽÔ", "‚¯ŽÔ‹S", 4, 6); + x2("‹S|\\GŽÔ", "ŽÔ‹S", 0, 2); + x2("‹S|\\AŽÔ", "bŽÔ‹S", 3, 5); + x2("‹S|\\AŽÔ", "ŽÔ", 0, 2); + x2("‹S|ŽÔ\\Z", "ŽÔ‹S", 2, 4); + x2("‹S|ŽÔ\\Z", "ŽÔ", 0, 2); + x2("‹S|ŽÔ\\Z", "ŽÔ\n", 0, 2); + x2("‹S|ŽÔ\\z", "ŽÔ‹S", 2, 4); + x2("‹S|ŽÔ\\z", "ŽÔ", 0, 2); + x2("\\w|\\s", "‚¨", 0, 2); + x2("\\w|%", "%‚¨", 0, 1); + x2("\\w|[&$]", "‚¤&", 0, 2); + x2("[‚¢-‚¯]", "‚¤", 0, 2); + x2("[‚¢-‚¯]|[^‚©-‚±]", "‚ ", 0, 2); + x2("[‚¢-‚¯]|[^‚©-‚±]", "‚©", 0, 2); + x2("[^‚ ]", "\n", 0, 1); + x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¤‚ð", 0, 2); + x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¢‚ð", 0, 4); + x2("‚ ‚¢‚¤|(?=‚¯‚¯)..‚Ù", "‚¯‚¯‚Ù", 0, 6); + x2("‚ ‚¢‚¤|(?!‚¯‚¯)..‚Ù", "‚ ‚¢‚Ù", 0, 6); + x2("(?=‚ð‚ )..‚ |(?=‚ð‚ð)..‚ ", "‚ð‚ð‚ ", 0, 6); + x2("(?<=‚ |‚¢‚¤)‚¢", "‚¢‚¤‚¢", 4, 6); + n("(?>‚ |‚ ‚¢‚¦)‚¤", "‚ ‚¢‚¦‚¤"); + x2("(?>‚ ‚¢‚¦|‚ )‚¤", "‚ ‚¢‚¦‚¤", 0, 8); + x2("‚ ?|‚¢", "‚ ", 0, 2); + x2("‚ ?|‚¢", "‚¢", 0, 0); + x2("‚ ?|‚¢", "", 0, 0); + x2("‚ *|‚¢", "‚ ‚ ", 0, 4); + x2("‚ *|‚¢*", "‚¢‚ ", 0, 0); + x2("‚ *|‚¢*", "‚ ‚¢", 0, 2); + x2("[a‚ ]*|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 3); + x2("‚ +|‚¢*", "", 0, 0); + x2("‚ +|‚¢*", "‚¢‚¢‚¢", 0, 6); + x2("‚ +|‚¢*", "‚ ‚¢‚¢‚¢", 0, 2); + x2("‚ +|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 0); + n("‚ +|‚¢+", ""); + x2("(‚ |‚¢)?", "‚¢", 0, 2); + x2("(‚ |‚¢)*", "‚¢‚ ", 0, 4); + x2("(‚ |‚¢)+", "‚¢‚ ‚¢", 0, 6); + x2("(‚ ‚¢|‚¤‚ )+", "‚¤‚ ‚ ‚¢‚¤‚¦", 0, 8); + x2("(‚ ‚¢|‚¤‚¦)+", "‚¤‚ ‚ ‚¢‚¤‚¦", 4, 12); + x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚ ‚¢‚¤‚ ", 2, 10); + x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚¢‚ð‚¤‚ ", 0, 4); + x2("(‚ ‚¢|‚¤‚ )+", "$$zzzz‚ ‚¢‚ð‚¤‚ ", 6, 10); + x2("(‚ |‚¢‚ ‚¢)+", "‚ ‚¢‚ ‚¢‚ ", 0, 10); + x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ", 2, 4); + x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ‚ ‚ ‚¢‚ ", 2, 8); + x2("(?:‚ |‚¢)(?:‚ |‚¢)", "‚ ‚¢", 0, 4); + x2("(?:‚ *|‚¢*)(?:‚ *|‚¢*)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 6); + x2("(?:‚ *|‚¢*)(?:‚ +|‚¢+)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12); + x2("(?:‚ +|‚¢+){2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12); + x2("(?:‚ +|‚¢+){1,2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12); + x2("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚¤‚¤", 0, 4); + n("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚ ‚¢‚¤‚¤"); + x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚ ‚¢‚¤", 12, 16); + x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚¢‚¤", 0, 14); + x2("‚¤{0,}", "‚¤‚¤‚¤‚¤", 0, 8); + x2("‚ |(?i)c", "C", 0, 1); + x2("(?i)c|‚ ", "C", 0, 1); + x2("(?i:‚ )|a", "a", 0, 1); + n("(?i:‚ )|a", "A"); + x2("[‚ ‚¢‚¤]?", "‚ ‚¢‚¤", 0, 2); + x2("[‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 6); + x2("[^‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 0); + n("[^‚ ‚¢‚¤]+", "‚ ‚¢‚¤"); + x2("‚ ?\?", "‚ ‚ ‚ ", 0, 0); + x2("‚¢‚ ?\?‚¢", "‚¢‚ ‚¢", 0, 6); + x2("‚ *?", "‚ ‚ ‚ ", 0, 0); + x2("‚¢‚ *?", "‚¢‚ ‚ ", 0, 2); + x2("‚¢‚ *?‚¢", "‚¢‚ ‚ ‚¢", 0, 8); + x2("‚ +?", "‚ ‚ ‚ ", 0, 2); + x2("‚¢‚ +?", "‚¢‚ ‚ ", 0, 4); + x2("‚¢‚ +?‚¢", "‚¢‚ ‚ ‚¢", 0, 8); + x2("(?:“V?)?\?", "“V", 0, 0); + x2("(?:“V?\?)?", "“V", 0, 0); + x2("(?:–²?)+?", "–²–²–²", 0, 2); + x2("(?:•—+)?\?", "•—•—•—", 0, 0); + x2("(?:á+)?\?‘š", "ááá‘š", 0, 8); + x2("(?:‚ ‚¢)?{2}", "", 0, 0); + x2("(?:‹SŽÔ)?{2}", "‹SŽÔ‹SŽÔ‹S", 0, 8); + x2("(?:‹SŽÔ)*{0}", "‹SŽÔ‹SŽÔ‹S", 0, 0); + x2("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16); + n("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ"); + x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ", 0, 12); + x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16); + x2("(?:‹SŽÔ){2,4}?", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 8); + x2("(?:‹SŽÔ){,}", "‹SŽÔ{,}", 0, 7); + x2("(?:‚©‚«‚­)+?{2}", "‚©‚«‚­‚©‚«‚­‚©‚«‚­", 0, 12); + x3("(‰Î)", "‰Î", 0, 2, 1); + x3("(‰Î…)", "‰Î…", 0, 4, 1); + x2("((ŽžŠÔ))", "ŽžŠÔ", 0, 4); + x3("((•—…))", "•—…", 0, 4, 1); + x3("((ð“ú))", "ð“ú", 0, 4, 2); + x3("((((((((((((((((((((—ÊŽq))))))))))))))))))))", "—ÊŽq", 0, 4, 20); + x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 0, 4, 1); + x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 4, 8, 2); + x3("()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 3); + x3("(()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±)", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 4); + x3(".*(ƒtƒH)ƒ“Eƒ}(ƒ“()ƒVƒ…ƒ^)ƒCƒ“", "ƒtƒHƒ“Eƒ}ƒ“ƒVƒ…ƒ^ƒCƒ“", 10, 18, 2); + x2("(^‚ )", "‚ ", 0, 2); + x3("(‚ )|(‚ )", "‚¢‚ ", 2, 4, 1); + x3("(^‚ )|(‚ )", "‚¢‚ ", 2, 4, 2); + x3("(‚ ?)", "‚ ‚ ‚ ", 0, 2, 1); + x3("(‚Ü*)", "‚܂܂Ü", 0, 6, 1); + x3("(‚Æ*)", "", 0, 0, 1); + x3("(‚é+)", "‚é‚é‚é‚é‚é‚é‚é", 0, 14, 1); + x3("(‚Ó+|‚Ö*)", "‚Ó‚Ó‚Ó‚Ö‚Ö", 0, 6, 1); + x3("(‚ +|‚¢?)", "‚¢‚¢‚¢‚ ‚ ", 0, 2, 1); + x3("(‚ ‚¢‚¤)?", "‚ ‚¢‚¤", 0, 6, 1); + x3("(‚ ‚¢‚¤)*", "‚ ‚¢‚¤", 0, 6, 1); + x3("(‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1); + x3("(‚³‚µ‚·|‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1); + x3("([‚ȂɂÊ][‚©‚«‚­]|‚©‚«‚­)+", "‚©‚«‚­", 0, 6, 1); + x3("((?i:‚ ‚¢‚¤))", "‚ ‚¢‚¤", 0, 6, 1); + x3("((?m:‚ .‚¤))", "‚ \n‚¤", 0, 5, 1); + x3("((?=‚ ‚ñ)‚ )", "‚ ‚ñ‚¢", 0, 2, 1); + x3("‚ ‚¢‚¤|(.‚ ‚¢‚¦)", "‚ñ‚ ‚¢‚¦", 0, 8, 1); + x3("‚ *(.)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1); + x3("‚ *?(.)", "‚ ‚ ‚ ‚ ‚ñ", 0, 2, 1); + x3("‚ *?(‚ñ)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1); + x3("[‚¢‚¤‚¦]‚ *(.)", "‚¦‚ ‚ ‚ ‚ ‚ñ", 10, 12, 1); + x3("(\\A‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1); + n("(\\A‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤"); + x3("(^‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1); + n("(^‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤"); + x3("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é", 4, 8, 1); + n("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é‚é"); + x2("(–³)\\1", "–³–³", 0, 4); + n("(–³)\\1", "–³•"); + x2("(‹ó?)\\1", "‹ó‹ó", 0, 4); + x2("(‹ó?\?)\\1", "‹ó‹ó", 0, 0); + x2("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 8); + x3("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 4, 1); + x2("‚ (‚¢*)\\1", "‚ ‚¢‚¢‚¢‚¢", 0, 10); + x2("‚ (‚¢*)\\1", "‚ ‚¢", 0, 2); + x2("(‚ *)(‚¢*)\\1\\2", "‚ ‚ ‚ ‚¢‚¢‚ ‚ ‚ ‚¢‚¢", 0, 20); + x2("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 0, 14); + x3("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 6, 10, 2); + x2("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚ۂۂۂ؂҂ۂۂÛ", 0, 16); + x3("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚ۂۂۂ؂҂ۂۂÛ", 0, 6, 7); + x2("(‚Í)(‚Ð)(‚Ó)\\2\\1\\3", "‚͂ЂӂЂ͂Ó", 0, 12); + x2("([‚«-‚¯])\\1", "‚­‚­", 0, 4); + x2("(\\w\\d\\s)\\1", "‚ 5 ‚ 5 ", 0, 8); + n("(\\w\\d\\s)\\1", "‚ 5 ‚ 5"); + x2("(’NH|[‚ -‚¤]{3})\\1", "’NH’NH", 0, 8); + x2("...(’NH|[‚ -‚¤]{3})\\1", "‚ a‚ ’NH’NH", 0, 13); + x2("(’NH|[‚ -‚¤]{3})\\1", "‚¤‚¢‚¤‚¤‚¢‚¤", 0, 12); + x2("(^‚±)\\1", "‚±‚±", 0, 4); + n("(^‚Þ)\\1", "‚߂ނÞ"); + n("(‚ $)\\1", "‚ ‚ "); + n("(‚ ‚¢\\Z)\\1", "‚ ‚¢"); + x2("(‚ *\\Z)\\1", "‚ ", 2, 2); + x2(".(‚ *\\Z)\\1", "‚¢‚ ", 2, 4); + x3("(.(‚â‚¢‚ä)\\2)", "z‚â‚¢‚ä‚â‚¢‚ä", 0, 13, 1); + x3("(.(..\\d.)\\2)", "‚ 12341234", 0, 10, 1); + x2("((?i:‚ v‚¸))\\1", "‚ v‚¸‚ v‚¸", 0, 10); + x2("(?<‹ð‚©>•Ï|\\(\\g<‹ð‚©>\\))", "((((((•Ï))))))", 0, 14); + x2("\\A(?:\\g<ˆ¢_1>|\\g<‰]_2>|\\zI—¹ (?<ˆ¢_1>ŠÏ|Ž©\\g<‰]_2>Ž©)(?<‰]_2>Ý|•ìŽF\\g<ˆ¢_1>•ìŽF))$", "•ìŽFŽ©•ìŽFŽ©ÝŽ©•ìŽFŽ©•ìŽF", 0, 26); + x2("[[‚ЂÓ]]", "‚Ó", 0, 2); + x2("[[‚¢‚¨‚¤]‚©]", "‚©", 0, 2); + n("[[^‚ ]]", "‚ "); + n("[^[‚ ]]", "‚ "); + x2("[^[^‚ ]]", "‚ ", 0, 2); + x2("[[‚©‚«‚­]&&‚«‚­]", "‚­", 0, 2); + n("[[‚©‚«‚­]&&‚«‚­]", "‚©"); + n("[[‚©‚«‚­]&&‚«‚­]", "‚¯"); + x2("[‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï", 0, 2); + n("[^‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï"); + x2("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚¢", 0, 2); + n("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚ "); + x2("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚«", 0, 2); + n("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚¢"); + x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¤", 0, 2); + x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¦", 0, 2); + n("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚©"); + x2("[‚ -&&-‚ ]", "-", 0, 1); + x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]q-w]", "‚¦", 0, 2); + x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "f", 0, 1); + x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "g", 0, 1); + n("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "2"); + x2("aƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh<\\/b>", "aƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh", 0, 32); + x2(".ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh<\\/b>", "aƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh", 0, 32); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/ext/mbstring/tests/mb_eregi_replace.phpt b/ext/mbstring/tests/mb_eregi_replace.phpt index dd6162c251..2f0776bc78 100644 --- a/ext/mbstring/tests/mb_eregi_replace.phpt +++ b/ext/mbstring/tests/mb_eregi_replace.phpt @@ -29,9 +29,9 @@ function do_translit($st) { return $st; } -echo do_translit("Фуцк"); +echo do_translit("Пеар"); ?> --EXPECT-- -Fuck +pear --CREDITS-- -Testfest Wuerzburg 2009-06-20 +Testfest Wuerzburg 2009-06-20 (modified by rui 2011-10-15) \ No newline at end of file -- 2.50.1